perf: use term ordinal order when sorting by keys

term ordinals are sorted lexicographically. we can use than to sort instead of reading the terms from the dictionary.
introduce SegmentAggregationCollector trait
2026-05-14 23:30:41 +00:00 · 2023-02-09 16:27:18 +08:00 · 2023-01-30 14:30:40 +08:00 · 2023-01-26 18:25:03 +08:00 · 2023-01-26 14:15:39 +08:00 · 2023-01-26 14:14:08 +08:00
98 changed files with 2918 additions and 3758 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -55,11 +55,11 @@ measure_time = "0.8.2"
 async-trait = "0.1.53"
 arc-swap = "1.5.0"

+columnar = { version="0.1", path="./columnar", package ="tantivy-columnar" }
 sstable = { version="0.1", path="./sstable", package ="tantivy-sstable", optional = true }
 stacker = { version="0.1", path="./stacker", package ="tantivy-stacker" }
 tantivy-query-grammar = { version= "0.19.0", path="./query-grammar" }
 tantivy-bitpacker = 		{ version= "0.3", path="./bitpacker" }
-columnar = 		{ version= "0.1", path="./columnar", package="tantivy-columnar" }
 common = 								{ version= "0.5", path = "./common/", package = "tantivy-common" }
 tokenizer-api = { version="0.1", path="./tokenizer-api", package="tantivy-tokenizer-api" }

--- a/bitpacker/src/bitpacker.rs
+++ b/bitpacker/src/bitpacker.rs
@@ -19,7 +19,7 @@ impl BitPacker {
    }

    #[inline]
-    pub fn write<TWrite: io::Write + ?Sized>(
+    pub fn write<TWrite: io::Write>(
        &mut self,
        val: u64,
        num_bits: u8,
@@ -43,7 +43,7 @@ impl BitPacker {
        Ok(())
    }

-    pub fn flush<TWrite: io::Write + ?Sized>(&mut self, output: &mut TWrite) -> io::Result<()> {
+    pub fn flush<TWrite: io::Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
        if self.mini_buffer_written > 0 {
            let num_bytes = (self.mini_buffer_written + 7) / 8;
            let bytes = self.mini_buffer.to_le_bytes();
@@ -54,7 +54,7 @@ impl BitPacker {
        Ok(())
    }

-    pub fn close<TWrite: io::Write + ?Sized>(&mut self, output: &mut TWrite) -> io::Result<()> {
+    pub fn close<TWrite: io::Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
        self.flush(output)?;
        Ok(())
    }
--- a/columnar/Cargo.toml
+++ b/columnar/Cargo.toml
@@ -5,19 +5,18 @@ edition = "2021"
 license = "MIT"

 [dependencies]
+itertools = "0.10.5"
+log = "0.4.17"
+fnv = "1.0.7"
+fastdivide = "0.4.0"
+rand = { version = "0.8.5", optional = true }
+measure_time = { version = "0.8.2", optional = true }
+prettytable-rs = { version = "0.10.0", optional = true }
+
 stacker = { path = "../stacker", package="tantivy-stacker"}
-serde_json = "1"
-thiserror = "1"
-fnv = "1"
 sstable = { path = "../sstable", package = "tantivy-sstable" }
 common = { path = "../common", package = "tantivy-common" }
-itertools = "0.10"
-log = "0.4"
 tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" }
-prettytable-rs = {version="0.10.0", optional= true}
-rand = {version="0.8.3", optional= true}
-fastdivide = "0.4"
-measure_time = { version="0.8.2", optional=true}

 [dev-dependencies]
 proptest = "1"
--- a/columnar/benches/bench.rs
+++ b/columnar/benches/bench.rs
@@ -0,0 +1,311 @@
+#![feature(test)]
+
+extern crate test;
+
+#[cfg(test)]
+mod tests {
+    use std::ops::RangeInclusive;
+    use std::sync::Arc;
+
+    use common::OwnedBytes;
+    use rand::prelude::*;
+    use tantivy_columnar::*;
+    use test::Bencher;
+
+    use super::*;
+
+    // Warning: this generates the same permutation at each call
+    fn generate_permutation() -> Vec<u64> {
+        let mut permutation: Vec<u64> = (0u64..100_000u64).collect();
+        permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
+        permutation
+    }
+
+    fn generate_random() -> Vec<u64> {
+        let mut permutation: Vec<u64> = (0u64..100_000u64)
+            .map(|el| el + random::<u16>() as u64)
+            .collect();
+        permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
+        permutation
+    }
+
+    // Warning: this generates the same permutation at each call
+    fn generate_permutation_gcd() -> Vec<u64> {
+        let mut permutation: Vec<u64> = (1u64..100_000u64).map(|el| el * 1000).collect();
+        permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
+        permutation
+    }
+
+    pub fn serialize_and_load<T: MonotonicallyMappableToU64 + Ord + Default>(
+        column: &[T],
+    ) -> Arc<dyn Column<T>> {
+        let mut buffer = Vec::new();
+        serialize(VecColumn::from(&column), &mut buffer, &ALL_CODEC_TYPES).unwrap();
+        open(OwnedBytes::new(buffer)).unwrap()
+    }
+
+    #[bench]
+    fn bench_intfastfield_jumpy_veclookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        b.iter(|| {
+            let mut a = 0u64;
+            for _ in 0..n {
+                a = permutation[a as usize];
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_jumpy_fflookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
+        b.iter(|| {
+            let mut a = 0u64;
+            for _ in 0..n {
+                a = column.get_val(a as u32);
+            }
+            a
+        });
+    }
+
+    const FIFTY_PERCENT_RANGE: RangeInclusive<u64> = 1..=50;
+    const SINGLE_ITEM: u64 = 90;
+    const SINGLE_ITEM_RANGE: RangeInclusive<u64> = 90..=90;
+    const ONE_PERCENT_ITEM_RANGE: RangeInclusive<u64> = 49..=49;
+    fn get_data_50percent_item() -> Vec<u128> {
+        let mut rng = StdRng::from_seed([1u8; 32]);
+
+        let mut data = vec![];
+        for _ in 0..300_000 {
+            let val = rng.gen_range(1..=100);
+            data.push(val);
+        }
+        data.push(SINGLE_ITEM);
+
+        data.shuffle(&mut rng);
+        let data = data.iter().map(|el| *el as u128).collect::<Vec<_>>();
+        data
+    }
+    fn get_u128_column_random() -> Arc<dyn Column<u128>> {
+        let permutation = generate_random();
+        let permutation = permutation.iter().map(|el| *el as u128).collect::<Vec<_>>();
+        get_u128_column_from_data(&permutation)
+    }
+
+    fn get_u128_column_from_data(data: &[u128]) -> Arc<dyn Column<u128>> {
+        let mut out = vec![];
+        let iter_gen = || data.iter().cloned();
+        serialize_u128(iter_gen, data.len() as u32, &mut out).unwrap();
+        let out = OwnedBytes::new(out);
+        open_u128::<u128>(out).unwrap()
+    }
+
+    // U64 RANGE START
+    #[bench]
+    fn bench_intfastfield_getrange_u64_50percent_hit(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(
+                FIFTY_PERCENT_RANGE,
+                0..data.len() as u32,
+                &mut positions,
+            );
+            positions
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_getrange_u64_1percent_hit(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(
+                ONE_PERCENT_ITEM_RANGE,
+                0..data.len() as u32,
+                &mut positions,
+            );
+            positions
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_getrange_u64_single_hit(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(
+                SINGLE_ITEM_RANGE,
+                0..data.len() as u32,
+                &mut positions,
+            );
+            positions
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_getrange_u64_hit_all(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(0..=u64::MAX, 0..data.len() as u32, &mut positions);
+            positions
+        });
+    }
+    // U64 RANGE END
+
+    // U128 RANGE START
+    #[bench]
+    fn bench_intfastfield_getrange_u128_50percent_hit(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let column = get_u128_column_from_data(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(
+                *FIFTY_PERCENT_RANGE.start() as u128..=*FIFTY_PERCENT_RANGE.end() as u128,
+                0..data.len() as u32,
+                &mut positions,
+            );
+            positions
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_getrange_u128_single_hit(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let column = get_u128_column_from_data(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(
+                *SINGLE_ITEM_RANGE.start() as u128..=*SINGLE_ITEM_RANGE.end() as u128,
+                0..data.len() as u32,
+                &mut positions,
+            );
+            positions
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_getrange_u128_hit_all(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let column = get_u128_column_from_data(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(0..=u128::MAX, 0..data.len() as u32, &mut positions);
+            positions
+        });
+    }
+    // U128 RANGE END
+
+    #[bench]
+    fn bench_intfastfield_scan_all_fflookup_u128(b: &mut Bencher) {
+        let column = get_u128_column_random();
+
+        b.iter(|| {
+            let mut a = 0u128;
+            for i in 0u64..column.num_vals() as u64 {
+                a += column.get_val(i as u32);
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_jumpy_stride5_u128(b: &mut Bencher) {
+        let column = get_u128_column_random();
+
+        b.iter(|| {
+            let n = column.num_vals();
+            let mut a = 0u128;
+            for i in (0..n / 5).map(|val| val * 5) {
+                a += column.get_val(i);
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_stride7_vec(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        b.iter(|| {
+            let mut a = 0u64;
+            for i in (0..n / 7).map(|val| val * 7) {
+                a += permutation[i as usize];
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_stride7_fflookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
+        b.iter(|| {
+            let mut a = 0;
+            for i in (0..n / 7).map(|val| val * 7) {
+                a += column.get_val(i as u32);
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_scan_all_fflookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
+        b.iter(|| {
+            let mut a = 0u64;
+            for i in 0u32..n as u32 {
+                a += column.get_val(i);
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_scan_all_fflookup_gcd(b: &mut Bencher) {
+        let permutation = generate_permutation_gcd();
+        let n = permutation.len();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
+        b.iter(|| {
+            let mut a = 0u64;
+            for i in 0..n {
+                a += column.get_val(i as u32);
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_scan_all_vec(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        b.iter(|| {
+            let mut a = 0u64;
+            for i in 0..permutation.len() {
+                a += permutation[i as usize] as u64;
+            }
+            a
+        });
+    }
+}
--- a/columnar/benches/bench_u128.rs
+++ b/columnar/benches/bench_u128.rs
@@ -1,129 +0,0 @@
-#![feature(test)]
-
-use std::ops::RangeInclusive;
-use std::sync::Arc;
-
-use common::OwnedBytes;
-use rand::rngs::StdRng;
-use rand::seq::SliceRandom;
-use rand::{random, Rng, SeedableRng};
-use tantivy_columnar::ColumnValues;
-use test::Bencher;
-extern crate test;
-
-// TODO does this make sense for IPv6 ?
-fn generate_random() -> Vec<u64> {
-    let mut permutation: Vec<u64> = (0u64..100_000u64)
-        .map(|el| el + random::<u16>() as u64)
-        .collect();
-    permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
-    permutation
-}
-
-fn get_u128_column_random() -> Arc<dyn ColumnValues<u128>> {
-    let permutation = generate_random();
-    let permutation = permutation.iter().map(|el| *el as u128).collect::<Vec<_>>();
-    get_u128_column_from_data(&permutation)
-}
-
-fn get_u128_column_from_data(data: &[u128]) -> Arc<dyn ColumnValues<u128>> {
-    let mut out = vec![];
-    tantivy_columnar::column_values::serialize_column_values_u128(
-        &(|| data.iter().copied()),
-        data.len() as u32,
-        &mut out,
-    )
-    .unwrap();
-    let out = OwnedBytes::new(out);
-    tantivy_columnar::column_values::open_u128_mapped::<u128>(out).unwrap()
-}
-
-const FIFTY_PERCENT_RANGE: RangeInclusive<u64> = 1..=50;
-const SINGLE_ITEM: u64 = 90;
-const SINGLE_ITEM_RANGE: RangeInclusive<u64> = 90..=90;
-const ONE_PERCENT_ITEM_RANGE: RangeInclusive<u64> = 49..=49;
-fn get_data_50percent_item() -> Vec<u128> {
-    let mut rng = StdRng::from_seed([1u8; 32]);
-
-    let mut data = vec![];
-    for _ in 0..300_000 {
-        let val = rng.gen_range(1..=100);
-        data.push(val);
-    }
-    data.push(SINGLE_ITEM);
-    data.shuffle(&mut rng);
-    let data = data.iter().map(|el| *el as u128).collect::<Vec<_>>();
-    data
-}
-
-#[bench]
-fn bench_intfastfield_getrange_u128_50percent_hit(b: &mut Bencher) {
-    let data = get_data_50percent_item();
-    let column = get_u128_column_from_data(&data);
-
-    b.iter(|| {
-        let mut positions = Vec::new();
-        column.get_docids_for_value_range(
-            *FIFTY_PERCENT_RANGE.start() as u128..=*FIFTY_PERCENT_RANGE.end() as u128,
-            0..data.len() as u32,
-            &mut positions,
-        );
-        positions
-    });
-}
-
-#[bench]
-fn bench_intfastfield_getrange_u128_single_hit(b: &mut Bencher) {
-    let data = get_data_50percent_item();
-    let column = get_u128_column_from_data(&data);
-
-    b.iter(|| {
-        let mut positions = Vec::new();
-        column.get_docids_for_value_range(
-            *SINGLE_ITEM_RANGE.start() as u128..=*SINGLE_ITEM_RANGE.end() as u128,
-            0..data.len() as u32,
-            &mut positions,
-        );
-        positions
-    });
-}
-
-#[bench]
-fn bench_intfastfield_getrange_u128_hit_all(b: &mut Bencher) {
-    let data = get_data_50percent_item();
-    let column = get_u128_column_from_data(&data);
-
-    b.iter(|| {
-        let mut positions = Vec::new();
-        column.get_docids_for_value_range(0..=u128::MAX, 0..data.len() as u32, &mut positions);
-        positions
-    });
-}
-// U128 RANGE END
-
-#[bench]
-fn bench_intfastfield_scan_all_fflookup_u128(b: &mut Bencher) {
-    let column = get_u128_column_random();
-
-    b.iter(|| {
-        let mut a = 0u128;
-        for i in 0u64..column.num_vals() as u64 {
-            a += column.get_val(i as u32);
-        }
-        a
-    });
-}
-
-#[bench]
-fn bench_intfastfield_jumpy_stride5_u128(b: &mut Bencher) {
-    let column = get_u128_column_random();
-
-    b.iter(|| {
-        let n = column.num_vals();
-        let mut a = 0u128;
-        for i in (0..n / 5).map(|val| val * 5) {
-            a += column.get_val(i);
-        }
-        a
-    });
-}
--- a/columnar/benches/bench_u64.rs
+++ b/columnar/benches/bench_u64.rs
@@ -1,213 +0,0 @@
-#![feature(test)]
-extern crate test;
-
-use std::ops::RangeInclusive;
-use std::sync::Arc;
-
-use rand::prelude::*;
-use tantivy_columnar::column_values::{
-    serialize_and_load_u64_based_column_values, CodecType, ALL_U64_CODEC_TYPES,
-};
-use tantivy_columnar::*;
-use test::Bencher;
-
-// Warning: this generates the same permutation at each call
-fn generate_permutation() -> Vec<u64> {
-    let mut permutation: Vec<u64> = (0u64..100_000u64).collect();
-    permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
-    permutation
-}
-
-fn generate_random() -> Vec<u64> {
-    let mut permutation: Vec<u64> = (0u64..100_000u64)
-        .map(|el| el + random::<u16>() as u64)
-        .collect();
-    permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
-    permutation
-}
-
-// Warning: this generates the same permutation at each call
-fn generate_permutation_gcd() -> Vec<u64> {
-    let mut permutation: Vec<u64> = (1u64..100_000u64).map(|el| el * 1000).collect();
-    permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
-    permutation
-}
-
-pub fn serialize_and_load(column: &[u64], codec_type: CodecType) -> Arc<dyn ColumnValues<u64>> {
-    serialize_and_load_u64_based_column_values(&column, &[codec_type])
-}
-
-#[bench]
-fn bench_intfastfield_jumpy_veclookup(b: &mut Bencher) {
-    let permutation = generate_permutation();
-    let n = permutation.len();
-    b.iter(|| {
-        let mut a = 0u64;
-        for _ in 0..n {
-            a = permutation[a as usize];
-        }
-        a
-    });
-}
-
-#[bench]
-fn bench_intfastfield_jumpy_fflookup_bitpacked(b: &mut Bencher) {
-    let permutation = generate_permutation();
-    let n = permutation.len();
-    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&permutation, CodecType::Bitpacked);
-    b.iter(|| {
-        let mut a = 0u64;
-        for _ in 0..n {
-            a = column.get_val(a as u32);
-        }
-        a
-    });
-}
-
-const FIFTY_PERCENT_RANGE: RangeInclusive<u64> = 1..=50;
-const SINGLE_ITEM: u64 = 90;
-const SINGLE_ITEM_RANGE: RangeInclusive<u64> = 90..=90;
-const ONE_PERCENT_ITEM_RANGE: RangeInclusive<u64> = 49..=49;
-fn get_data_50percent_item() -> Vec<u128> {
-    let mut rng = StdRng::from_seed([1u8; 32]);
-
-    let mut data = vec![];
-    for _ in 0..300_000 {
-        let val = rng.gen_range(1..=100);
-        data.push(val);
-    }
-    data.push(SINGLE_ITEM);
-
-    data.shuffle(&mut rng);
-    let data = data.iter().map(|el| *el as u128).collect::<Vec<_>>();
-    data
-}
-
-// U64 RANGE START
-#[bench]
-fn bench_intfastfield_getrange_u64_50percent_hit(b: &mut Bencher) {
-    let data = get_data_50percent_item();
-    let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
-    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&data, CodecType::Bitpacked);
-    b.iter(|| {
-        let mut positions = Vec::new();
-        column.get_docids_for_value_range(
-            FIFTY_PERCENT_RANGE,
-            0..data.len() as u32,
-            &mut positions,
-        );
-        positions
-    });
-}
-
-#[bench]
-fn bench_intfastfield_getrange_u64_1percent_hit(b: &mut Bencher) {
-    let data = get_data_50percent_item();
-    let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
-    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&data, CodecType::Bitpacked);
-
-    b.iter(|| {
-        let mut positions = Vec::new();
-        column.get_docids_for_value_range(
-            ONE_PERCENT_ITEM_RANGE,
-            0..data.len() as u32,
-            &mut positions,
-        );
-        positions
-    });
-}
-
-#[bench]
-fn bench_intfastfield_getrange_u64_single_hit(b: &mut Bencher) {
-    let data = get_data_50percent_item();
-    let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
-    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&data, CodecType::Bitpacked);
-
-    b.iter(|| {
-        let mut positions = Vec::new();
-        column.get_docids_for_value_range(SINGLE_ITEM_RANGE, 0..data.len() as u32, &mut positions);
-        positions
-    });
-}
-
-#[bench]
-fn bench_intfastfield_getrange_u64_hit_all(b: &mut Bencher) {
-    let data = get_data_50percent_item();
-    let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
-    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&data, CodecType::Bitpacked);
-
-    b.iter(|| {
-        let mut positions = Vec::new();
-        column.get_docids_for_value_range(0..=u64::MAX, 0..data.len() as u32, &mut positions);
-        positions
-    });
-}
-// U64 RANGE END
-
-#[bench]
-fn bench_intfastfield_stride7_vec(b: &mut Bencher) {
-    let permutation = generate_permutation();
-    let n = permutation.len();
-    b.iter(|| {
-        let mut a = 0u64;
-        for i in (0..n / 7).map(|val| val * 7) {
-            a += permutation[i as usize];
-        }
-        a
-    });
-}
-
-#[bench]
-fn bench_intfastfield_stride7_fflookup(b: &mut Bencher) {
-    let permutation = generate_permutation();
-    let n = permutation.len();
-    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&permutation, CodecType::Bitpacked);
-    b.iter(|| {
-        let mut a = 0;
-        for i in (0..n / 7).map(|val| val * 7) {
-            a += column.get_val(i as u32);
-        }
-        a
-    });
-}
-
-#[bench]
-fn bench_intfastfield_scan_all_fflookup(b: &mut Bencher) {
-    let permutation = generate_permutation();
-    let n = permutation.len();
-    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&permutation, CodecType::Bitpacked);
-    let column_ref = column.as_ref();
-    b.iter(|| {
-        let mut a = 0u64;
-        for i in 0u32..n as u32 {
-            a += column.get_val(i);
-        }
-        a
-    });
-}
-
-#[bench]
-fn bench_intfastfield_scan_all_fflookup_gcd(b: &mut Bencher) {
-    let permutation = generate_permutation_gcd();
-    let n = permutation.len();
-    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&permutation, CodecType::Bitpacked);
-    b.iter(|| {
-        let mut a = 0u64;
-        for i in 0..n {
-            a += column.get_val(i as u32);
-        }
-        a
-    });
-}
-
-#[bench]
-fn bench_intfastfield_scan_all_vec(b: &mut Bencher) {
-    let permutation = generate_permutation();
-    b.iter(|| {
-        let mut a = 0u64;
-        for i in 0..permutation.len() {
-            a += permutation[i as usize] as u64;
-        }
-        a
-    });
-}
--- a/columnar/src/TODO.md
+++ b/columnar/src/TODO.md
@@ -1,12 +1,17 @@
 # zero to one
-* merges with non trivial mapping (deletes / sort)
-* emission of the sort mapping.
-+ muttivaued range queries restrat frm the beginning all of the time.
-* revisit line codec
-* removal of all rows of a column in the schema due to deletes
-* Plugging JSON
-replug examples
-
+* merges
+* full still needs a num_values
+* replug u128
+* add dictionary encoded stuff
+* fix multivalued
+* find a way to make columnar work with strict types
+* plug to tantivy
+    - indexing
+    - aggregations
+    - merge
+* replug facets
+* replug range queries
+ mutlivaued range queries restrat frm the beginning all of the time.

 # Perf and Size
 * re-add ZSTD compression for dictionaries
@@ -14,6 +19,7 @@ no systematic monotonic mapping
 consider removing multilinear
 f32?
 adhoc solution for bool?
+
 add metrics helper for aggregate. sum(row_id)
 review inline absence/presence
 improv perf of select using PDEP
@@ -21,8 +27,11 @@ compare with roaring bitmap/elias fano etc etc.
 SIMD range? (see blog post)
 Add alignment?
 Consider another codec to bridge the gap between few and 5k elements
+replug examples
+replug fast_field_codecs bench

 # Cleanup and rationalization
+remove the 6 bit limitation of columntype. use 4 + 4 bits instead.
 in benchmark, unify percent vs ratio, f32 vs f64.
 investigate if should have better errors? io::Error is overused at the moment.
 rename rank/select in unit tests
@@ -33,13 +42,16 @@ use the rank & select naming in unit tests branch.
 multi-linear -> blockwise
 linear codec -> simply a multiplication for the index column
 rename columnar to something more explicit, like column_dictionary or columnar_table
+remove old column from the fast field API.
+remove the Column traits alias.
 rename fastfield -> column
 document changes
 rationalization FastFieldValue, HasColumnType
-isolate u128_based and uniform naming
+

 # Other
 fix enhance column-cli

 # Santa claus
+
 autodetect datetime ipaddr, plug customizable tokenizer.
--- a/columnar/src/column/dictionary_encoded.rs
+++ b/columnar/src/column/dictionary_encoded.rs
@@ -62,12 +62,6 @@ impl From<BytesColumn> for StrColumn {
    }
 }

-impl From<StrColumn> for BytesColumn {
-    fn from(str_column: StrColumn) -> BytesColumn {
-        str_column.0
-    }
-}
-
 impl StrColumn {
    pub fn dictionary(&self) -> &Dictionary<VoidSSTable> {
        self.0.dictionary.as_ref()
--- a/columnar/src/column/mod.rs
+++ b/columnar/src/column/mod.rs
@@ -2,7 +2,6 @@ mod dictionary_encoded;
 mod serialize;

 use std::fmt::Debug;
-use std::io::Write;
 use std::ops::Deref;
 use std::sync::Arc;

@@ -14,33 +13,16 @@ pub use serialize::{
 };

 use crate::column_index::ColumnIndex;
-use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
-use crate::column_values::{monotonic_map_column, ColumnValues};
-use crate::{Cardinality, MonotonicallyMappableToU64, RowId};
+use crate::column_values::ColumnValues;
+use crate::{Cardinality, RowId};

 #[derive(Clone)]
-pub struct Column<T = u64> {
+pub struct Column<T> {
    pub idx: ColumnIndex,
    pub values: Arc<dyn ColumnValues<T>>,
 }

-impl<T: MonotonicallyMappableToU64> Column<T> {
-    pub fn to_u64_monotonic(self) -> Column<u64> {
-        let values = Arc::new(monotonic_map_column(
-            self.values,
-            StrictlyMonotonicMappingToInternal::<T>::new(),
-        ));
-        Column {
-            idx: self.idx,
-            values,
-        }
-    }
-}
-
 impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
-    pub fn get_cardinality(&self) -> Cardinality {
-        self.idx.get_cardinality()
-    }
    pub fn num_rows(&self) -> RowId {
        match &self.idx {
            ColumnIndex::Full => self.values.num_vals() as u32,
@@ -87,7 +69,7 @@ impl<T> Deref for Column<T> {
 }

 impl BinarySerializable for Cardinality {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> std::io::Result<()> {
+    fn serialize<W: std::io::Write>(&self, writer: &mut W) -> std::io::Result<()> {
        self.to_code().serialize(writer)
    }

--- a/columnar/src/column/serialize.rs
+++ b/columnar/src/column/serialize.rs
@@ -9,36 +9,43 @@ use sstable::Dictionary;
 use crate::column::{BytesColumn, Column};
 use crate::column_index::{serialize_column_index, SerializableColumnIndex};
 use crate::column_values::serialize::serialize_column_values_u128;
-use crate::column_values::u64_based::{serialize_u64_based_column_values, CodecType};
-use crate::column_values::{MonotonicallyMappableToU128, MonotonicallyMappableToU64};
-use crate::iterable::{map_iterable, Iterable};
+use crate::column_values::{
+    serialize_column_values, ColumnValues, FastFieldCodecType, MonotonicallyMappableToU128,
+    MonotonicallyMappableToU64,
+};

-pub fn serialize_column_mappable_to_u128<I, T: MonotonicallyMappableToU128>(
+pub fn serialize_column_mappable_to_u128<
+    F: Fn() -> I,
+    I: Iterator<Item = T>,
+    T: MonotonicallyMappableToU128,
+>(
    column_index: SerializableColumnIndex<'_>,
-    iterable: &dyn Fn() -> I,
+    column_values: F,
    num_vals: u32,
    output: &mut impl Write,
-) -> io::Result<()>
-where
-    I: Iterator<Item = T>,
-{
+) -> io::Result<()> {
    let column_index_num_bytes = serialize_column_index(column_index, output)?;
-    let u128_iterable = map_iterable(iterable, MonotonicallyMappableToU128::to_u128);
-    serialize_column_values_u128(&u128_iterable, num_vals, output)?;
+    serialize_column_values_u128(
+        || column_values().map(|val| val.to_u128()),
+        num_vals,
+        output,
+    )?;
    output.write_all(&column_index_num_bytes.to_le_bytes())?;
    Ok(())
 }

-pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64 + Debug, I>(
+pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64 + Debug>(
    column_index: SerializableColumnIndex<'_>,
-    column_values: &dyn Fn() -> I,
+    column_values: &impl ColumnValues<T>,
    output: &mut impl Write,
-) -> io::Result<()>
-where I: Iterator<Item=T> {
+) -> io::Result<()> {
    let column_index_num_bytes = serialize_column_index(column_index, output)?;
-    serialize_u64_based_column_values(
+    serialize_column_values(
        column_values,
-        &[CodecType::Bitpacked, CodecType::BlockwiseLinear],
+        &[
+            FastFieldCodecType::Bitpacked,
+            FastFieldCodecType::BlockwiseLinear,
+        ],
        output,
    )?;
    output.write_all(&column_index_num_bytes.to_le_bytes())?;
@@ -55,8 +62,7 @@ pub fn open_column_u64<T: MonotonicallyMappableToU64>(bytes: OwnedBytes) -> io::
    );
    let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
    let column_index = crate::column_index::open_column_index(column_index_data)?;
-    let column_values =
-        crate::column_values::u64_based::load_u64_based_column_values(column_values_data)?;
+    let column_values = crate::column_values::open_u64_mapped(column_values_data)?;
    Ok(Column {
        idx: column_index,
        values: column_values,
--- a/columnar/src/column_index/merge.rs
+++ b/columnar/src/column_index/merge.rs
@@ -1,174 +0,0 @@
-use std::iter;
-
-use crate::column_index::{
-    multivalued_index, serialize_column_index, SerializableColumnIndex, Set,
-};
-use crate::iterable::Iterable;
-use crate::{Cardinality, ColumnIndex, MergeRowOrder, RowId, StackMergeOrder};
-
-fn detect_cardinality(columns: &[Option<ColumnIndex>]) -> Cardinality {
-    columns
-        .iter()
-        .flatten()
-        .map(ColumnIndex::get_cardinality)
-        .max()
-        .unwrap_or(Cardinality::Full)
-}
-
-pub fn stack_column_index<'a>(
-    columns: &'a [Option<ColumnIndex>],
-    merge_row_order: &'a MergeRowOrder,
-) -> SerializableColumnIndex<'a> {
-    let MergeRowOrder::Stack(stack_merge_order) = merge_row_order else {
-        panic!("only supporting stacking at the moment.");
-    };
-    let cardinality = detect_cardinality(columns);
-    match cardinality {
-        Cardinality::Full => SerializableColumnIndex::Full,
-        Cardinality::Optional =>  {
-            let stacked_optional_index: StackedOptionalIndex<'a> = StackedOptionalIndex {
-                columns,
-                stack_merge_order,
-            };
-            SerializableColumnIndex::Optional {
-                non_null_row_ids: Box::new(move || Box::new(stacked_optional_index.iter())),
-                num_rows: stack_merge_order.num_rows(),
-            }
-        },
-        Cardinality::Multivalued => {
-            let stacked_multivalued_index = StackedMultivaluedIndex {
-                columns,
-                stack_merge_order,
-            };
-            SerializableColumnIndex::Multivalued(Box::new(move || stacked_multivalued_index.boxed_iter()))
-        }
-    }
-}
-
-struct StackedOptionalIndex<'a> {
-    columns: &'a [Option<ColumnIndex>],
-    stack_merge_order: &'a StackMergeOrder,
-}
-
-impl<'a> StackedOptionalIndex<'a> {
-    fn iter(&self) -> impl Iterator<Item=RowId> + 'a {
-        Box::new(
-            self.columns
-                .iter()
-                .enumerate()
-                .flat_map(|(columnar_id, column_index_opt)| {
-                    let columnar_row_range = self.stack_merge_order.columnar_range(columnar_id);
-                    let rows_it: Box<dyn Iterator<Item = RowId>> = match column_index_opt {
-                        Some(ColumnIndex::Full) => Box::new(columnar_row_range),
-                        Some(ColumnIndex::Optional(optional_index)) => Box::new(
-                            optional_index
-                                .iter_rows()
-                                .map(move |row_id: RowId| row_id + columnar_row_range.start),
-                        ),
-                        Some(ColumnIndex::Multivalued(_)) => {
-                            panic!("No multivalued index is allowed when stacking column index");
-                        }
-                        None => Box::new(std::iter::empty()),
-                    };
-                    rows_it
-                }),
-        )
-    }
-}
-
-#[derive(Clone, Copy)]
-struct StackedMultivaluedIndex<'a> {
-    columns: &'a [Option<ColumnIndex>],
-    stack_merge_order: &'a StackMergeOrder,
-}
-
-fn convert_column_opt_to_multivalued_index<'a>(
-    column_index_opt: Option<&'a ColumnIndex>,
-    num_rows: RowId,
-) -> Box<dyn Iterator<Item = RowId> + 'a> {
-    match column_index_opt {
-        None => Box::new(iter::repeat(0u32).take(num_rows as usize + 1)),
-        Some(ColumnIndex::Full) => Box::new(0..num_rows + 1),
-        Some(ColumnIndex::Optional(optional_index)) => {
-            Box::new(
-                (0..num_rows)
-                    // TODO optimize
-                    .map(|row_id| optional_index.rank(row_id))
-                    .chain(std::iter::once(optional_index.num_non_nulls())),
-            )
-        }
-        Some(ColumnIndex::Multivalued(multivalued_index)) => {
-            multivalued_index.start_index_column.iter()
-        }
-    }
-}
-
-impl<'a> StackedMultivaluedIndex<'a> {
-    fn boxed_iter(&self) -> Box<dyn Iterator<Item = RowId> + 'a> {
-        let multivalued_indexes =
-            self.columns
-                .iter()
-                .map(Option::as_ref)
-                .enumerate()
-                .map(|(columnar_id, column_opt)| {
-                    let num_rows =
-                        self.stack_merge_order.columnar_range(columnar_id).len() as RowId;
-                    convert_column_opt_to_multivalued_index(column_opt, num_rows)
-                });
-        stack_multivalued_indexes(multivalued_indexes)
-    }
-}
-
-// Refactor me
-fn stack_multivalued_indexes<'a>(
-    mut multivalued_indexes: impl Iterator<Item = Box<dyn Iterator<Item = RowId> + 'a>> + 'a,
-) -> Box<dyn Iterator<Item = RowId> + 'a> {
-    let mut offset = 0;
-    let mut last_row_id = 0;
-    let mut current_it = multivalued_indexes.next();
-    Box::new(std::iter::from_fn(move || loop {
-        let Some(multivalued_index) = current_it.as_mut() else {
-            return None;
-        };
-        if let Some(row_id) = multivalued_index.next() {
-            last_row_id = offset + row_id;
-            return Some(last_row_id);
-        }
-        offset = last_row_id;
-        loop {
-            current_it = multivalued_indexes.next();
-            if current_it.as_mut()?.next().is_some() {
-                break;
-            }
-        }
-    }))
-}
-
-fn stack_multivalued_index<'a>(
-    columns: &'a [Option<ColumnIndex>],
-    stack_merge_order: &StackMergeOrder,
-) -> Box<dyn Iterable<RowId> + 'a> {
-    todo!()
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::RowId;
-
-    fn it<'a>(row_ids: &'a [RowId]) -> Box<dyn Iterator<Item = RowId> + 'a> {
-        Box::new(row_ids.iter().copied())
-    }
-
-    #[test]
-    fn test_stack() {
-        let columns = [
-            it(&[0u32, 0u32]),
-            it(&[0u32, 1u32, 1u32, 4u32]),
-            it(&[0u32, 3u32, 5u32]),
-            it(&[0u32, 4u32]),
-        ]
-        .into_iter();
-        let start_offsets: Vec<RowId> = super::stack_multivalued_indexes(columns).collect();
-        assert_eq!(start_offsets, &[0, 0, 1, 1, 4, 7, 9, 13]);
-    }
-}
--- a/columnar/src/column_index/mod.rs
+++ b/columnar/src/column_index/mod.rs
@@ -1,12 +1,10 @@
-mod merge;
 mod multivalued_index;
 mod optional_index;
 mod serialize;

 use std::ops::Range;

-pub use merge::stack_column_index;
-pub use optional_index::{OptionalIndex, Set};
+pub use optional_index::{OptionalIndex, SerializableOptionalIndex, Set};
 pub use serialize::{open_column_index, serialize_column_index, SerializableColumnIndex};

 use crate::column_index::multivalued_index::MultiValueIndex;
--- a/columnar/src/column_index/multivalued_index.rs
+++ b/columnar/src/column_index/multivalued_index.rs
@@ -5,17 +5,16 @@ use std::sync::Arc;

 use common::OwnedBytes;

-use crate::column_values::u64_based::CodecType;
-use crate::column_values::ColumnValues;
+use crate::column_values::{ColumnValues, FastFieldCodecType};
 use crate::RowId;

-pub fn serialize_multivalued_index<'a>(
-    multivalued_index: &'a dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a>,
+pub fn serialize_multivalued_index(
+    multivalued_index: &dyn ColumnValues<RowId>,
    output: &mut impl Write,
 ) -> io::Result<()> {
-    crate::column_values::u64_based::serialize_u64_based_column_values(
-        multivalued_index,
-        &[CodecType::Bitpacked, CodecType::Linear],
+    crate::column_values::serialize_column_values(
+        &*multivalued_index,
+        &[FastFieldCodecType::Bitpacked, FastFieldCodecType::Linear],
        output,
    )?;
    Ok(())
@@ -23,7 +22,7 @@ pub fn serialize_multivalued_index<'a>(

 pub fn open_multivalued_index(bytes: OwnedBytes) -> io::Result<MultiValueIndex> {
    let start_index_column: Arc<dyn ColumnValues<RowId>> =
-        crate::column_values::u64_based::load_u64_based_column_values(bytes)?;
+        crate::column_values::open_u64_mapped(bytes)?;
    Ok(MultiValueIndex { start_index_column })
 }

@@ -31,7 +30,7 @@ pub fn open_multivalued_index(bytes: OwnedBytes) -> io::Result<MultiValueIndex>
 /// Index to resolve value range for given doc_id.
 /// Starts at 0.
 pub struct MultiValueIndex {
-    pub start_index_column: Arc<dyn crate::ColumnValues<RowId>>,
+    start_index_column: Arc<dyn crate::ColumnValues<RowId>>,
 }

 impl From<Arc<dyn ColumnValues<RowId>>> for MultiValueIndex {
--- a/columnar/src/column_index/optional_index/mod.rs
+++ b/columnar/src/column_index/optional_index/mod.rs
@@ -1,4 +1,5 @@
 use std::io::{self, Write};
+use std::ops::Range;
 use std::sync::Arc;

 mod set;
@@ -10,7 +11,6 @@ use set_block::{
    DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec, DENSE_BLOCK_NUM_BYTES,
 };

-use crate::iterable::Iterable;
 use crate::{InvalidData, RowId};

 /// The threshold for for number of elements after which we switch to dense block encoding.
@@ -96,12 +96,6 @@ impl OptionalIndex {
    pub fn num_non_nulls(&self) -> RowId {
        self.num_non_null_rows
    }
-
-    pub fn iter_rows<'a>(&'a self) -> impl Iterator<Item = RowId> + 'a {
-        // TODO optimize
-        let mut select_batch = self.select_cursor();
-        (0..self.num_non_null_rows).map(move |rank| select_batch.select(rank))
-    }
 }

 /// Splits a value address into lower and upper 16bits.
@@ -192,21 +186,6 @@ impl Set<RowId> for OptionalIndex {
        }
    }

-    #[inline]
-    fn rank(&self, row_id: RowId) -> RowId {
-        let RowAddr {
-            block_id,
-            in_block_row_id,
-        } = row_addr_from_row_id(row_id);
-        let block_meta = self.block_metas[block_id as usize];
-        let block = self.block(block_meta);
-        let block_offset_row_id = match block {
-            Block::Dense(dense_block) => dense_block.rank(in_block_row_id),
-            Block::Sparse(sparse_block) => sparse_block.rank(in_block_row_id),
-        } as u32;
-        block_meta.non_null_rows_before_block + block_offset_row_id
-    }
-
    #[inline]
    fn rank_if_exists(&self, row_id: RowId) -> Option<RowId> {
        let RowAddr {
@@ -321,7 +300,7 @@ impl OptionalIndexCodec {
 }

 impl BinarySerializable for OptionalIndexCodec {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_all(&[self.to_code()])
    }

@@ -343,13 +322,12 @@ fn serialize_optional_index_block(block_els: &[u16], out: &mut impl io::Write) -
 }

 pub fn serialize_optional_index<'a, W: io::Write>(
-    non_null_rows: &dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a>,
-    num_rows: RowId,
+    serializable_optional_index: &dyn SerializableOptionalIndex<'a>,
    output: &mut W,
 ) -> io::Result<()> {
-    VInt(num_rows as u64).serialize(output)?;
+    VInt(serializable_optional_index.num_rows() as u64).serialize(output)?;

-    let mut rows_it = non_null_rows();
+    let mut rows_it = serializable_optional_index.non_null_rows();
    let mut block_metadata: Vec<SerializedBlockMeta> = Vec::new();
    let mut current_block = Vec::new();

@@ -502,5 +480,19 @@ pub fn open_optional_index(bytes: OwnedBytes) -> io::Result<OptionalIndex> {
    Ok(optional_index)
 }

+pub trait SerializableOptionalIndex<'a> {
+    fn num_rows(&self) -> RowId;
+    fn non_null_rows(&self) -> Box<dyn Iterator<Item = RowId> + 'a>;
+}
+
+impl SerializableOptionalIndex<'static> for Range<u32> {
+    fn num_rows(&self) -> RowId {
+        self.end
+    }
+    fn non_null_rows(&self) -> Box<dyn Iterator<Item = RowId> + 'static> {
+        Box::new(self.clone())
+    }
+}
+
 #[cfg(test)]
 mod tests;
--- a/columnar/src/column_index/optional_index/set.rs
+++ b/columnar/src/column_index/optional_index/set.rs
@@ -28,10 +28,7 @@ pub trait Set<T> {
    /// Returns true if the elements is contained in the Set
    fn contains(&self, el: T) -> bool;

-    /// Returns the number of rows in the set that are < `el`
-    fn rank(&self, el: T) -> T;
-
-    /// If the set contains `el` returns the element rank.
+    /// If the set contains `el` returns its position in the sortd set of elements.
    /// If the set does not contain the element, it returns `None`.
    fn rank_if_exists(&self, el: T) -> Option<T>;

--- a/columnar/src/column_index/optional_index/set_block/dense.rs
+++ b/columnar/src/column_index/optional_index/set_block/dense.rs
@@ -148,15 +148,6 @@ impl<'a> Set<u16> for DenseBlock<'a> {
        }
    }

-    #[inline(always)]
-    fn rank(&self, el: u16) -> u16 {
-        let block_pos = el / ELEMENTS_PER_MINI_BLOCK;
-        let index_block = self.mini_block(block_pos);
-        let pos_in_block_bit_vec = el % ELEMENTS_PER_MINI_BLOCK;
-        let ones_in_block = rank_u64(index_block.bitvec, pos_in_block_bit_vec);
-        index_block.rank + ones_in_block
-    }
-
    #[inline(always)]
    fn select(&self, rank: u16) -> u16 {
        let block_id = self.find_miniblock_containing_rank(rank, 0).unwrap();
--- a/columnar/src/column_index/optional_index/set_block/sparse.rs
+++ b/columnar/src/column_index/optional_index/set_block/sparse.rs
@@ -44,11 +44,6 @@ impl<'a> Set<u16> for SparseBlock<'a> {
        self.binary_search(el).ok()
    }

-    #[inline(always)]
-    fn rank(&self, el: u16) -> u16 {
-        self.binary_search(el).unwrap_or_else(|el| el)
-    }
-
    #[inline(always)]
    fn select(&self, rank: u16) -> u16 {
        let offset = rank as usize * 2;
--- a/columnar/src/column_index/optional_index/set_block/tests.rs
+++ b/columnar/src/column_index/optional_index/set_block/tests.rs
@@ -1,7 +1,8 @@
 use std::collections::HashMap;

-use crate::column_index::optional_index::set_block::dense::DENSE_BLOCK_NUM_BYTES;
-use crate::column_index::optional_index::set_block::{DenseBlockCodec, SparseBlockCodec};
+use crate::column_index::optional_index::set_block::{
+    DenseBlockCodec, SparseBlockCodec, DENSE_BLOCK_NUM_BYTES,
+};
 use crate::column_index::optional_index::{SelectCursor, Set, SetCodec};

 fn test_set_helper<C: SetCodec<Item = u16>>(vals: &[u16]) -> usize {
@@ -17,10 +18,6 @@ fn test_set_helper<C: SetCodec<Item = u16>>(vals: &[u16]) -> usize {
    for val in 0u16..=u16::MAX {
        assert_eq!(tested_set.contains(val), hash_set.contains_key(&val));
        assert_eq!(tested_set.rank_if_exists(val), hash_set.get(&val).copied());
-        assert_eq!(
-            tested_set.rank(val),
-            vals.iter().cloned().take_while(|v| *v < val).count() as u16
-        );
    }
    for rank in 0..vals.len() {
        assert_eq!(tested_set.select(rank as u16), vals[rank]);
--- a/columnar/src/column_index/optional_index/tests.rs
+++ b/columnar/src/column_index/optional_index/tests.rs
@@ -37,7 +37,7 @@ proptest! {
 fn test_with_random_sets_simple() {
    let vals = 10..BLOCK_SIZE * 2;
    let mut out: Vec<u8> = Vec::new();
-    serialize_optional_index(&vals.clone(), 100, &mut out).unwrap();
+    serialize_optional_index(&vals.clone(), &mut out).unwrap();
    let null_index = open_optional_index(OwnedBytes::new(out)).unwrap();
    let ranks: Vec<u32> = (65_472u32..65_473u32).collect();
    let els: Vec<u32> = ranks.iter().copied().map(|rank| rank + 10).collect();
@@ -66,8 +66,12 @@ fn test_optional_index_one_block_true() {
    test_null_index(&iter[..]);
 }

-impl<'a> Iterable<RowId> for &'a [bool] {
-    fn boxed_iter(&self) -> Box<dyn Iterator<Item = RowId> + 'a> {
+impl<'a> SerializableOptionalIndex<'a> for &'a [bool] {
+    fn num_rows(&self) -> RowId {
+        self.len() as u32
+    }
+
+    fn non_null_rows(&self) -> Box<dyn Iterator<Item = RowId> + 'a> {
        Box::new(
            self.iter()
                .cloned()
@@ -80,7 +84,7 @@ impl<'a> Iterable<RowId> for &'a [bool] {

 fn test_null_index(data: &[bool]) {
    let mut out: Vec<u8> = Vec::new();
-    serialize_optional_index(&data, data.len() as RowId, &mut out).unwrap();
+    serialize_optional_index(&data, &mut out).unwrap();
    let null_index = open_optional_index(OwnedBytes::new(out)).unwrap();
    let orig_idx_with_value: Vec<u32> = data
        .iter()
@@ -109,7 +113,7 @@ fn test_null_index(data: &[bool]) {
 fn test_optional_index_test_translation() {
    let mut out = vec![];
    let iter = &[true, false, true, false];
-    serialize_optional_index(&&iter[..], iter.len() as u32, &mut out).unwrap();
+    serialize_optional_index(&&iter[..], &mut out).unwrap();
    let null_index = open_optional_index(OwnedBytes::new(out)).unwrap();
    let mut select_cursor = null_index.select_cursor();
    assert_eq!(select_cursor.select(0), 0);
@@ -120,7 +124,7 @@ fn test_optional_index_test_translation() {
 fn test_optional_index_translate() {
    let mut out = vec![];
    let iter = &[true, false, true, false];
-    serialize_optional_index(&&iter[..], iter.len() as RowId, &mut out).unwrap();
+    serialize_optional_index(&&iter[..], &mut out).unwrap();
    let null_index = open_optional_index(OwnedBytes::new(out)).unwrap();
    assert_eq!(null_index.rank_if_exists(0), Some(0));
    assert_eq!(null_index.rank_if_exists(2), Some(1));
@@ -130,7 +134,7 @@ fn test_optional_index_translate() {
 fn test_optional_index_small() {
    let mut out = vec![];
    let iter = &[true, false, true, false];
-    serialize_optional_index(&&iter[..], iter.len() as RowId, &mut out).unwrap();
+    serialize_optional_index(&&iter[..], &mut out).unwrap();
    let null_index = open_optional_index(OwnedBytes::new(out)).unwrap();
    assert!(null_index.contains(0));
    assert!(!null_index.contains(1));
@@ -145,7 +149,7 @@ fn test_optional_index_large() {
    docs.extend((0..=1).map(|_idx| true));

    let mut out = vec![];
-    serialize_optional_index(&&docs[..], docs.len() as RowId, &mut out).unwrap();
+    serialize_optional_index(&&docs[..], &mut out).unwrap();
    let null_index = open_optional_index(OwnedBytes::new(out)).unwrap();
    assert!(!null_index.contains(0));
    assert!(!null_index.contains(100));
@@ -154,59 +158,6 @@ fn test_optional_index_large() {
    assert!(null_index.contains(ELEMENTS_PER_BLOCK + 1));
 }

-fn test_optional_index_iter_aux(row_ids: &[RowId], num_rows: RowId) {
-    let mut buffer: Vec<u8> = Vec::new();
-    serialize_optional_index(&row_ids, num_rows, &mut buffer).unwrap();
-    let null_index = open_optional_index(OwnedBytes::new(buffer)).unwrap();
-    assert_eq!(null_index.num_rows(), num_rows);
-    assert!(null_index.iter_rows().eq(row_ids.iter().copied()));
-}
-
-#[test]
-fn test_optional_index_iter_empty() {
-    test_optional_index_iter_aux(&[], 0u32);
-}
-
-fn test_optional_index_rank_aux(row_ids: &[RowId]) {
-    let mut buffer: Vec<u8> = Vec::new();
-    let num_rows = row_ids.last().copied().unwrap_or(0u32) + 1;
-    serialize_optional_index(&row_ids, num_rows, &mut buffer).unwrap();
-    let null_index = open_optional_index(OwnedBytes::new(buffer)).unwrap();
-    assert_eq!(null_index.num_rows(), num_rows);
-    for (row_id, row_val) in row_ids.iter().copied().enumerate() {
-        assert_eq!(null_index.rank(row_val), row_id as u32);
-        assert_eq!(null_index.rank_if_exists(row_val), Some(row_id as u32));
-        if row_val > 0 && !null_index.contains(&row_val - 1) {
-            assert_eq!(null_index.rank(row_val - 1), row_id as u32);
-        }
-        assert_eq!(null_index.rank(row_val + 1), row_id as u32 + 1);
-    }
-}
-
-#[test]
-fn test_optional_index_rank() {
-    test_optional_index_rank_aux(&[1u32]);
-    test_optional_index_rank_aux(&[0u32, 1u32]);
-    let mut block = Vec::new();
-    block.push(3u32);
-    block.extend((0..BLOCK_SIZE).map(|i| i + BLOCK_SIZE + 1));
-    test_optional_index_rank_aux(&block);
-}
-
-#[test]
-fn test_optional_index_iter_empty_one() {
-    test_optional_index_iter_aux(&[1], 2u32);
-    test_optional_index_iter_aux(&[100_000], 200_000u32);
-}
-
-#[test]
-fn test_optional_index_iter_dense_block() {
-    let mut block = Vec::new();
-    block.push(3u32);
-    block.extend((0..BLOCK_SIZE).map(|i| i + BLOCK_SIZE + 1));
-    test_optional_index_iter_aux(&block, 3 * BLOCK_SIZE);
-}
-
 #[cfg(all(test, feature = "unstable"))]
 mod bench {

--- a/columnar/src/column_index/serialize.rs
+++ b/columnar/src/column_index/serialize.rs
@@ -3,35 +3,32 @@ use std::io::Write;

 use common::{CountingWriter, OwnedBytes};

-use crate::column_index::multivalued_index::{serialize_multivalued_index, self};
+use crate::column_index::multivalued_index::serialize_multivalued_index;
 use crate::column_index::optional_index::serialize_optional_index;
-use crate::column_index::ColumnIndex;
-use crate::iterable::Iterable;
+use crate::column_index::{ColumnIndex, SerializableOptionalIndex};
+use crate::column_values::ColumnValues;
 use crate::{Cardinality, RowId};

 pub enum SerializableColumnIndex<'a> {
    Full,
-    Optional {
-        non_null_row_ids: Box<dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a> + 'a>,
-        num_rows: RowId,
-    },
+    Optional(Box<dyn SerializableOptionalIndex<'a> + 'a>),
    // TODO remove the Arc<dyn> apart from serialization this is not
    // dynamic at all.
-    Multivalued(&'a dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a>),
+    Multivalued(Box<dyn ColumnValues<RowId> + 'a>),
 }

 impl<'a> SerializableColumnIndex<'a> {
    pub fn get_cardinality(&self) -> Cardinality {
        match self {
            SerializableColumnIndex::Full => Cardinality::Full,
-            SerializableColumnIndex::Optional { .. } => Cardinality::Optional,
+            SerializableColumnIndex::Optional(_) => Cardinality::Optional,
            SerializableColumnIndex::Multivalued(_) => Cardinality::Multivalued,
        }
    }
 }

-pub fn serialize_column_index<'a>(
-    column_index: SerializableColumnIndex<'a>,
+pub fn serialize_column_index(
+    column_index: SerializableColumnIndex,
    output: &mut impl Write,
 ) -> io::Result<u32> {
    let mut output = CountingWriter::wrap(output);
@@ -39,13 +36,11 @@ pub fn serialize_column_index<'a>(
    output.write_all(&[cardinality])?;
    match column_index {
        SerializableColumnIndex::Full => {}
-        SerializableColumnIndex::Optional {
-            non_null_row_ids,
-            num_rows,
-        } => serialize_optional_index(non_null_row_ids.as_ref(), num_rows, &mut output)?,
+        SerializableColumnIndex::Optional(optional_index) => {
+            serialize_optional_index(&*optional_index, &mut output)?
+        }
        SerializableColumnIndex::Multivalued(multivalued_index) => {
-            let multivalued_index_ref: &'a dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a> = multivalued_index.as_ref();
-            serialize_multivalued_index(multivalued_index_ref, &mut output)?
+            serialize_multivalued_index(&*multivalued_index, &mut output)?
        }
    }
    let column_index_num_bytes = output.written_bytes() as u32;
--- a/columnar/src/column_values/bitpacked.rs
+++ b/columnar/src/column_values/bitpacked.rs
@@ -0,0 +1,115 @@
+use std::io::{self, Write};
+
+use common::OwnedBytes;
+use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
+
+use super::serialize::NormalizedHeader;
+use super::{ColumnValues, FastFieldCodec, FastFieldCodecType};
+
+/// Depending on the field type, a different
+/// fast field is required.
+#[derive(Clone)]
+pub struct BitpackedReader {
+    data: OwnedBytes,
+    bit_unpacker: BitUnpacker,
+    normalized_header: NormalizedHeader,
+}
+
+impl ColumnValues for BitpackedReader {
+    #[inline]
+    fn get_val(&self, doc: u32) -> u64 {
+        self.bit_unpacker.get(doc, &self.data)
+    }
+    #[inline]
+    fn min_value(&self) -> u64 {
+        // The BitpackedReader assumes a normalized vector.
+        0
+    }
+    #[inline]
+    fn max_value(&self) -> u64 {
+        self.normalized_header.max_value
+    }
+    #[inline]
+    fn num_vals(&self) -> u32 {
+        self.normalized_header.num_vals
+    }
+}
+
+pub struct BitpackedCodec;
+
+impl FastFieldCodec for BitpackedCodec {
+    /// The CODEC_TYPE is an enum value used for serialization.
+    const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::Bitpacked;
+
+    type Reader = BitpackedReader;
+
+    /// Opens a fast field given a file.
+    fn open_from_bytes(
+        data: OwnedBytes,
+        normalized_header: NormalizedHeader,
+    ) -> io::Result<Self::Reader> {
+        let num_bits = compute_num_bits(normalized_header.max_value);
+        let bit_unpacker = BitUnpacker::new(num_bits);
+        Ok(BitpackedReader {
+            data,
+            bit_unpacker,
+            normalized_header,
+        })
+    }
+
+    /// Serializes data with the BitpackedFastFieldSerializer.
+    ///
+    /// The bitpacker assumes that the column has been normalized.
+    /// i.e. It has already been shifted by its minimum value, so that its
+    /// current minimum value is 0.
+    ///
+    /// Ideally, we made a shift upstream on the column so that `col.min_value() == 0`.
+    fn serialize(column: &dyn ColumnValues, write: &mut impl Write) -> io::Result<()> {
+        assert_eq!(column.min_value(), 0u64);
+        let num_bits = compute_num_bits(column.max_value());
+        let mut bit_packer = BitPacker::new();
+        for val in column.iter() {
+            bit_packer.write(val, num_bits, write)?;
+        }
+        bit_packer.close(write)?;
+        Ok(())
+    }
+
+    fn estimate(column: &dyn ColumnValues) -> Option<f32> {
+        let num_bits = compute_num_bits(column.max_value());
+        let num_bits_uncompressed = 64;
+        Some(num_bits as f32 / num_bits_uncompressed as f32)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::column_values::tests::create_and_validate;
+
+    fn create_and_validate_bitpacked_codec(data: &[u64], name: &str) {
+        create_and_validate::<BitpackedCodec>(data, name);
+    }
+
+    #[test]
+    fn test_with_codec_data_sets() {
+        let data_sets = crate::column_values::tests::get_codec_test_datasets();
+        for (mut data, name) in data_sets {
+            create_and_validate_bitpacked_codec(&data, name);
+            data.reverse();
+            create_and_validate::<BitpackedCodec>(&data, name);
+        }
+    }
+
+    #[test]
+    fn bitpacked_fast_field_rand() {
+        for _ in 0..500 {
+            let mut data = (0..1 + rand::random::<u8>() as usize)
+                .map(|_| rand::random::<i64>() as u64 / 2)
+                .collect::<Vec<_>>();
+            create_and_validate_bitpacked_codec(&data, "rand");
+            data.reverse();
+            create_and_validate::<BitpackedCodec>(&data, "rand");
+        }
+    }
+}
--- a/columnar/src/column_values/blockwise_linear.rs
+++ b/columnar/src/column_values/blockwise_linear.rs
@@ -0,0 +1,188 @@
+use std::sync::Arc;
+use std::{io, iter};
+
+use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes};
+use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
+
+use crate::column_values::line::Line;
+use crate::column_values::serialize::NormalizedHeader;
+use crate::column_values::{ColumnValues, FastFieldCodec, FastFieldCodecType, VecColumn};
+
+const CHUNK_SIZE: usize = 512;
+
+#[derive(Debug, Default)]
+struct Block {
+    line: Line,
+    bit_unpacker: BitUnpacker,
+    data_start_offset: usize,
+}
+
+impl BinarySerializable for Block {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+        self.line.serialize(writer)?;
+        self.bit_unpacker.bit_width().serialize(writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let line = Line::deserialize(reader)?;
+        let bit_width = u8::deserialize(reader)?;
+        Ok(Block {
+            line,
+            bit_unpacker: BitUnpacker::new(bit_width),
+            data_start_offset: 0,
+        })
+    }
+}
+
+fn compute_num_blocks(num_vals: u32) -> usize {
+    (num_vals as usize + CHUNK_SIZE - 1) / CHUNK_SIZE
+}
+
+pub struct BlockwiseLinearCodec;
+
+impl FastFieldCodec for BlockwiseLinearCodec {
+    const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::BlockwiseLinear;
+    type Reader = BlockwiseLinearReader;
+
+    fn open_from_bytes(
+        bytes: common::OwnedBytes,
+        normalized_header: NormalizedHeader,
+    ) -> io::Result<Self::Reader> {
+        let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
+        let footer_offset = bytes.len() - 4 - footer_len as usize;
+        let (data, mut footer) = bytes.split(footer_offset);
+        let num_blocks = compute_num_blocks(normalized_header.num_vals);
+        let mut blocks: Vec<Block> = iter::repeat_with(|| Block::deserialize(&mut footer))
+            .take(num_blocks)
+            .collect::<io::Result<_>>()?;
+
+        let mut start_offset = 0;
+        for block in &mut blocks {
+            block.data_start_offset = start_offset;
+            start_offset += (block.bit_unpacker.bit_width() as usize) * CHUNK_SIZE / 8;
+        }
+        Ok(BlockwiseLinearReader {
+            blocks: Arc::new(blocks),
+            data,
+            normalized_header,
+        })
+    }
+
+    // Estimate first_chunk and extrapolate
+    fn estimate(column: &dyn ColumnValues) -> Option<f32> {
+        if column.num_vals() < 10 * CHUNK_SIZE as u32 {
+            return None;
+        }
+        let mut first_chunk: Vec<u64> = column.iter().take(CHUNK_SIZE).collect();
+        let line = Line::train(&VecColumn::from(&first_chunk));
+        for (i, buffer_val) in first_chunk.iter_mut().enumerate() {
+            let interpolated_val = line.eval(i as u32);
+            *buffer_val = buffer_val.wrapping_sub(interpolated_val);
+        }
+        let estimated_bit_width = first_chunk
+            .iter()
+            .map(|el| ((el + 1) as f32 * 3.0) as u64)
+            .map(compute_num_bits)
+            .max()
+            .unwrap();
+
+        let metadata_per_block = {
+            let mut out = vec![];
+            Block::default().serialize(&mut out).unwrap();
+            out.len()
+        };
+        let num_bits = estimated_bit_width as u64 * column.num_vals() as u64
+            // function metadata per block
+            + metadata_per_block as u64 * (column.num_vals() as u64 / CHUNK_SIZE as u64);
+        let num_bits_uncompressed = 64 * column.num_vals();
+        Some(num_bits as f32 / num_bits_uncompressed as f32)
+    }
+
+    fn serialize(column: &dyn ColumnValues, wrt: &mut impl io::Write) -> io::Result<()> {
+        // The BitpackedReader assumes a normalized vector.
+        assert_eq!(column.min_value(), 0);
+        let mut buffer = Vec::with_capacity(CHUNK_SIZE);
+        let num_vals = column.num_vals();
+
+        let num_blocks = compute_num_blocks(num_vals);
+        let mut blocks = Vec::with_capacity(num_blocks);
+
+        let mut vals = column.iter();
+
+        let mut bit_packer = BitPacker::new();
+
+        for _ in 0..num_blocks {
+            buffer.clear();
+            buffer.extend((&mut vals).take(CHUNK_SIZE));
+            let line = Line::train(&VecColumn::from(&buffer));
+
+            assert!(!buffer.is_empty());
+
+            for (i, buffer_val) in buffer.iter_mut().enumerate() {
+                let interpolated_val = line.eval(i as u32);
+                *buffer_val = buffer_val.wrapping_sub(interpolated_val);
+            }
+            let bit_width = buffer.iter().copied().map(compute_num_bits).max().unwrap();
+
+            for &buffer_val in &buffer {
+                bit_packer.write(buffer_val, bit_width, wrt)?;
+            }
+
+            blocks.push(Block {
+                line,
+                bit_unpacker: BitUnpacker::new(bit_width),
+                data_start_offset: 0,
+            });
+        }
+
+        bit_packer.close(wrt)?;
+
+        assert_eq!(blocks.len(), compute_num_blocks(num_vals));
+
+        let mut counting_wrt = CountingWriter::wrap(wrt);
+        for block in &blocks {
+            block.serialize(&mut counting_wrt)?;
+        }
+        let footer_len = counting_wrt.written_bytes();
+        (footer_len as u32).serialize(&mut counting_wrt)?;
+
+        Ok(())
+    }
+}
+
+#[derive(Clone)]
+pub struct BlockwiseLinearReader {
+    blocks: Arc<Vec<Block>>,
+    normalized_header: NormalizedHeader,
+    data: OwnedBytes,
+}
+
+impl ColumnValues for BlockwiseLinearReader {
+    #[inline(always)]
+    fn get_val(&self, idx: u32) -> u64 {
+        let block_id = (idx / CHUNK_SIZE as u32) as usize;
+        let idx_within_block = idx % (CHUNK_SIZE as u32);
+        let block = &self.blocks[block_id];
+        let interpoled_val: u64 = block.line.eval(idx_within_block);
+        let block_bytes = &self.data[block.data_start_offset..];
+        let bitpacked_diff = block.bit_unpacker.get(idx_within_block, block_bytes);
+        interpoled_val.wrapping_add(bitpacked_diff)
+    }
+
+    #[inline(always)]
+    fn min_value(&self) -> u64 {
+        // The BlockwiseLinearReader assumes a normalized vector.
+        0u64
+    }
+
+    #[inline(always)]
+    fn max_value(&self) -> u64 {
+        self.normalized_header.max_value
+    }
+
+    #[inline(always)]
+    fn num_vals(&self) -> u32 {
+        self.normalized_header.num_vals
+    }
+}
--- a/columnar/src/column_values/column.rs
+++ b/columnar/src/column_values/column.rs
@@ -1,17 +1,15 @@
 use std::fmt::Debug;
 use std::marker::PhantomData;
 use std::ops::{Range, RangeInclusive};
-use std::sync::Arc;

 use tantivy_bitpacker::minmax;

 use crate::column_values::monotonic_mapping::StrictlyMonotonicFn;
-use crate::iterable::Iterable;

 /// `ColumnValues` provides access to a dense field column.
 ///
 /// `Column` are just a wrapper over `ColumnValues` and a `ColumnIndex`.
-pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
+pub trait ColumnValues<T: PartialOrd + Debug = u64>: Send + Sync {
    /// Return the value associated with the given idx.
    ///
    /// This accessor should return as fast as possible.
@@ -29,7 +27,7 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
    ///
    /// Must panic if `start + output.len()` is greater than
    /// the segment's `maxdoc`.
-    #[inline(always)]
+    #[inline]
    fn get_range(&self, start: u64, output: &mut [T]) {
        for (out, idx) in output.iter_mut().zip(start..) {
            *out = self.get_val(idx as u32);
@@ -39,7 +37,7 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
    /// Get the positions of values which are in the provided value range.
    ///
    /// Note that position == docid for single value fast fields
-    #[inline(always)]
+    #[inline]
    fn get_docids_for_value_range(
        &self,
        value_range: RangeInclusive<T>,
@@ -80,33 +78,27 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
    }
 }

-impl<T: Copy + PartialOrd + Debug> ColumnValues<T> for Arc<dyn ColumnValues<T>> {
-    #[inline(always)]
+impl<T: Copy + PartialOrd + Debug> ColumnValues<T> for std::sync::Arc<dyn ColumnValues<T>> {
    fn get_val(&self, idx: u32) -> T {
        self.as_ref().get_val(idx)
    }

-    #[inline(always)]
    fn min_value(&self) -> T {
        self.as_ref().min_value()
    }

-    #[inline(always)]
    fn max_value(&self) -> T {
        self.as_ref().max_value()
    }

-    #[inline(always)]
    fn num_vals(&self) -> u32 {
        self.as_ref().num_vals()
    }

-    #[inline(always)]
    fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = T> + 'b> {
        self.as_ref().iter()
    }

-    #[inline(always)]
    fn get_range(&self, start: u64, output: &mut [T]) {
        self.as_ref().get_range(start, output)
    }
--- a/columnar/src/column_values/compact_space/mod.rs
+++ b/columnar/src/column_values/compact_space/mod.rs
@@ -55,7 +55,7 @@ impl RangeMapping {
 }

 impl BinarySerializable for CompactSpace {
-    fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
        VInt(self.ranges_mapping.len() as u64).serialize(writer)?;

        let mut prev_value = 0;
@@ -247,7 +247,7 @@ pub struct CompactSpaceDecompressor {
 }

 impl BinarySerializable for IPCodecParams {
-    fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
        // header flags for future optional dictionary encoding
        let footer_flags = 0u64;
        footer_flags.serialize(writer)?;
--- a/columnar/src/column_values/gcd.rs
+++ b/columnar/src/column_values/gcd.rs
@@ -0,0 +1,75 @@
+use std::num::NonZeroU64;
+
+use fastdivide::DividerU64;
+
+/// Compute the gcd of two non null numbers.
+///
+/// It is recommended, but not required, to feed values such that `large >= small`.
+fn compute_gcd(mut large: NonZeroU64, mut small: NonZeroU64) -> NonZeroU64 {
+    loop {
+        let rem: u64 = large.get() % small;
+        if let Some(new_small) = NonZeroU64::new(rem) {
+            (large, small) = (small, new_small);
+        } else {
+            return small;
+        }
+    }
+}
+
+// Find GCD for iterator of numbers
+pub fn find_gcd(numbers: impl Iterator<Item = u64>) -> Option<NonZeroU64> {
+    let mut numbers = numbers.flat_map(NonZeroU64::new);
+    let mut gcd: NonZeroU64 = numbers.next()?;
+    if gcd.get() == 1 {
+        return Some(gcd);
+    }
+
+    let mut gcd_divider = DividerU64::divide_by(gcd.get());
+    for val in numbers {
+        let remainder = val.get() - (gcd_divider.divide(val.get())) * gcd.get();
+        if remainder == 0 {
+            continue;
+        }
+        gcd = compute_gcd(val, gcd);
+        if gcd.get() == 1 {
+            return Some(gcd);
+        }
+
+        gcd_divider = DividerU64::divide_by(gcd.get());
+    }
+    Some(gcd)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::num::NonZeroU64;
+
+    use crate::column_values::gcd::{compute_gcd, find_gcd};
+
+    #[test]
+    fn test_compute_gcd() {
+        let test_compute_gcd_aux = |large, small, expected| {
+            let large = NonZeroU64::new(large).unwrap();
+            let small = NonZeroU64::new(small).unwrap();
+            let expected = NonZeroU64::new(expected).unwrap();
+            assert_eq!(compute_gcd(small, large), expected);
+            assert_eq!(compute_gcd(large, small), expected);
+        };
+        test_compute_gcd_aux(1, 4, 1);
+        test_compute_gcd_aux(2, 4, 2);
+        test_compute_gcd_aux(10, 25, 5);
+        test_compute_gcd_aux(25, 25, 25);
+    }
+
+    #[test]
+    fn find_gcd_test() {
+        assert_eq!(find_gcd([0].into_iter()), None);
+        assert_eq!(find_gcd([0, 10].into_iter()), NonZeroU64::new(10));
+        assert_eq!(find_gcd([10, 0].into_iter()), NonZeroU64::new(10));
+        assert_eq!(find_gcd([].into_iter()), None);
+        assert_eq!(find_gcd([15, 30, 5, 10].into_iter()), NonZeroU64::new(5));
+        assert_eq!(find_gcd([15, 16, 10].into_iter()), NonZeroU64::new(1));
+        assert_eq!(find_gcd([0, 5, 5, 5].into_iter()), NonZeroU64::new(5));
+        assert_eq!(find_gcd([0, 0].into_iter()), None);
+    }
+}
--- a/columnar/src/column_values/u64_based/line.rs
+++ b/columnar/src/column_values/u64_based/line.rs
@@ -17,8 +17,8 @@ const MID_POINT: u64 = (1u64 << 32) - 1u64;
 /// `y = m * x >> 32 + b`
 #[derive(Debug, Clone, Copy, Default)]
 pub struct Line {
-    pub(crate) slope: u64,
-    pub(crate) intercept: u64,
+    slope: u64,
+    intercept: u64,
 }

 /// Compute the line slope.
@@ -81,7 +81,7 @@ impl Line {
    }

    // Intercept is only computed from provided positions
-    pub fn train_from(
+    fn train_from(
        first_val: u64,
        last_val: u64,
        num_vals: u32,
@@ -145,7 +145,6 @@ impl Line {
    ///
    /// This function is only invariable by translation if all of the
    /// `ys` are packaged into half of the space. (See heuristic below)
-    /// TODO USE array
    pub fn train(ys: &dyn ColumnValues) -> Self {
        let first_val = ys.iter().next().unwrap();
        let last_val = ys.iter().nth(ys.num_vals() as usize - 1).unwrap();
@@ -159,7 +158,7 @@ impl Line {
 }

 impl BinarySerializable for Line {
-    fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
        VInt(self.slope).serialize(writer)?;
        VInt(self.intercept).serialize(writer)?;
        Ok(())
--- a/columnar/src/column_values/linear.rs
+++ b/columnar/src/column_values/linear.rs
@@ -0,0 +1,230 @@
+use std::io::{self, Write};
+
+use common::{BinarySerializable, OwnedBytes};
+use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
+
+use super::line::Line;
+use super::serialize::NormalizedHeader;
+use super::{ColumnValues, FastFieldCodec, FastFieldCodecType};
+
+/// Depending on the field type, a different
+/// fast field is required.
+#[derive(Clone)]
+pub struct LinearReader {
+    data: OwnedBytes,
+    linear_params: LinearParams,
+    header: NormalizedHeader,
+}
+
+impl ColumnValues for LinearReader {
+    #[inline]
+    fn get_val(&self, doc: u32) -> u64 {
+        let interpoled_val: u64 = self.linear_params.line.eval(doc);
+        let bitpacked_diff = self.linear_params.bit_unpacker.get(doc, &self.data);
+        interpoled_val.wrapping_add(bitpacked_diff)
+    }
+
+    #[inline(always)]
+    fn min_value(&self) -> u64 {
+        // The LinearReader assumes a normalized vector.
+        0u64
+    }
+
+    #[inline(always)]
+    fn max_value(&self) -> u64 {
+        self.header.max_value
+    }
+
+    #[inline]
+    fn num_vals(&self) -> u32 {
+        self.header.num_vals
+    }
+}
+
+/// Fastfield serializer, which tries to guess values by linear interpolation
+/// and stores the difference bitpacked.
+pub struct LinearCodec;
+
+#[derive(Debug, Clone)]
+struct LinearParams {
+    line: Line,
+    bit_unpacker: BitUnpacker,
+}
+
+impl BinarySerializable for LinearParams {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+        self.line.serialize(writer)?;
+        self.bit_unpacker.bit_width().serialize(writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let line = Line::deserialize(reader)?;
+        let bit_width = u8::deserialize(reader)?;
+        Ok(Self {
+            line,
+            bit_unpacker: BitUnpacker::new(bit_width),
+        })
+    }
+}
+
+impl FastFieldCodec for LinearCodec {
+    const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::Linear;
+
+    type Reader = LinearReader;
+
+    /// Opens a fast field given a file.
+    fn open_from_bytes(mut data: OwnedBytes, header: NormalizedHeader) -> io::Result<Self::Reader> {
+        let linear_params = LinearParams::deserialize(&mut data)?;
+        Ok(LinearReader {
+            data,
+            linear_params,
+            header,
+        })
+    }
+
+    /// Creates a new fast field serializer.
+    fn serialize(column: &dyn ColumnValues, write: &mut impl Write) -> io::Result<()> {
+        assert_eq!(column.min_value(), 0);
+        let line = Line::train(column);
+
+        let max_offset_from_line = column
+            .iter()
+            .enumerate()
+            .map(|(pos, actual_value)| {
+                let calculated_value = line.eval(pos as u32);
+                actual_value.wrapping_sub(calculated_value)
+            })
+            .max()
+            .unwrap();
+
+        let num_bits = compute_num_bits(max_offset_from_line);
+        let linear_params = LinearParams {
+            line,
+            bit_unpacker: BitUnpacker::new(num_bits),
+        };
+        linear_params.serialize(write)?;
+
+        let mut bit_packer = BitPacker::new();
+        for (pos, actual_value) in column.iter().enumerate() {
+            let calculated_value = line.eval(pos as u32);
+            let offset = actual_value.wrapping_sub(calculated_value);
+            bit_packer.write(offset, num_bits, write)?;
+        }
+        bit_packer.close(write)?;
+
+        Ok(())
+    }
+
+    /// estimation for linear interpolation is hard because, you don't know
+    /// where the local maxima for the deviation of the calculated value are and
+    /// the offset to shift all values to >=0 is also unknown.
+    #[allow(clippy::question_mark)]
+    fn estimate(column: &dyn ColumnValues) -> Option<f32> {
+        if column.num_vals() < 3 {
+            return None; // disable compressor for this case
+        }
+
+        let limit_num_vals = column.num_vals().min(100_000);
+
+        let num_samples = 100;
+        let step_size = (limit_num_vals / num_samples).max(1); // 20 samples
+        let mut sample_positions_and_values: Vec<_> = Vec::new();
+        for (pos, val) in column.iter().enumerate().step_by(step_size as usize) {
+            sample_positions_and_values.push((pos as u64, val));
+        }
+
+        let line = Line::estimate(&sample_positions_and_values);
+
+        let estimated_bit_width = sample_positions_and_values
+            .into_iter()
+            .map(|(pos, actual_value)| {
+                let interpolated_val = line.eval(pos as u32);
+                actual_value.wrapping_sub(interpolated_val)
+            })
+            .map(|diff| ((diff as f32 * 1.5) * 2.0) as u64)
+            .map(compute_num_bits)
+            .max()
+            .unwrap_or(0);
+
+        // Extrapolate to whole column
+        let num_bits = (estimated_bit_width as u64 * column.num_vals() as u64) + 64;
+        let num_bits_uncompressed = 64 * column.num_vals();
+        Some(num_bits as f32 / num_bits_uncompressed as f32)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use rand::RngCore;
+
+    use super::*;
+    use crate::column_values::tests;
+
+    fn create_and_validate(data: &[u64], name: &str) -> Option<(f32, f32)> {
+        tests::create_and_validate::<LinearCodec>(data, name)
+    }
+
+    #[test]
+    fn test_compression() {
+        let data = (10..=6_000_u64).collect::<Vec<_>>();
+        let (estimate, actual_compression) =
+            create_and_validate(&data, "simple monotonically large").unwrap();
+
+        assert_le!(actual_compression, 0.001);
+        assert_le!(estimate, 0.02);
+    }
+
+    #[test]
+    fn test_with_codec_datasets() {
+        let data_sets = tests::get_codec_test_datasets();
+        for (mut data, name) in data_sets {
+            create_and_validate(&data, name);
+            data.reverse();
+            create_and_validate(&data, name);
+        }
+    }
+    #[test]
+    fn linear_interpol_fast_field_test_large_amplitude() {
+        let data = vec![
+            i64::MAX as u64 / 2,
+            i64::MAX as u64 / 3,
+            i64::MAX as u64 / 2,
+        ];
+
+        create_and_validate(&data, "large amplitude");
+    }
+
+    #[test]
+    fn overflow_error_test() {
+        let data = vec![1572656989877777, 1170935903116329, 720575940379279, 0];
+        create_and_validate(&data, "overflow test");
+    }
+
+    #[test]
+    fn linear_interpol_fast_concave_data() {
+        let data = vec![0, 1, 2, 5, 8, 10, 20, 50];
+        create_and_validate(&data, "concave data");
+    }
+    #[test]
+    fn linear_interpol_fast_convex_data() {
+        let data = vec![0, 40, 60, 70, 75, 77];
+        create_and_validate(&data, "convex data");
+    }
+    #[test]
+    fn linear_interpol_fast_field_test_simple() {
+        let data = (10..=20_u64).collect::<Vec<_>>();
+        create_and_validate(&data, "simple monotonically");
+    }
+
+    #[test]
+    fn linear_interpol_fast_field_rand() {
+        let mut rng = rand::thread_rng();
+        for _ in 0..50 {
+            let mut data = (0..10_000).map(|_| rng.next_u64()).collect::<Vec<_>>();
+            create_and_validate(&data, "random");
+            data.reverse();
+            create_and_validate(&data, "random");
+        }
+    }
+}
--- a/columnar/src/column_values/mod.rs
+++ b/columnar/src/column_values/mod.rs
@@ -7,6 +7,9 @@
 //! - Encode data in different codecs.
 //! - Monotonically map values to u64/u128

+#[cfg(test)]
+mod tests;
+
 use std::fmt::Debug;
 use std::io;
 use std::io::Write;
@@ -15,27 +18,75 @@ use std::sync::Arc;
 use common::{BinarySerializable, OwnedBytes};
 use compact_space::CompactSpaceDecompressor;
 pub use monotonic_mapping::{MonotonicallyMappableToU64, StrictlyMonotonicFn};
-use monotonic_mapping::{StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal};
+use monotonic_mapping::{
+    StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
+    StrictlyMonotonicMappingToInternalBaseval, StrictlyMonotonicMappingToInternalGCDBaseval,
+};
 pub use monotonic_mapping_u128::MonotonicallyMappableToU128;
-use serialize::U128Header;
+use serialize::{Header, U128Header};

+mod bitpacked;
+mod blockwise_linear;
 mod compact_space;
+mod line;
+mod linear;
 pub(crate) mod monotonic_mapping;
 pub(crate) mod monotonic_mapping_u128;
-mod stats;
-pub(crate) mod u64_based;

 mod column;
+mod gcd;
 pub mod serialize;

-pub use serialize::serialize_column_values_u128;
-pub use stats::Stats;
-pub use u64_based::{
-    load_u64_based_column_values, serialize_and_load_u64_based_column_values,
-    serialize_u64_based_column_values, CodecType, ALL_U64_CODEC_TYPES,
-};
-
 pub use self::column::{monotonic_map_column, ColumnValues, IterColumn, VecColumn};
+#[cfg(test)]
+pub use self::serialize::tests::serialize_and_load;
+pub use self::serialize::{serialize_column_values, NormalizedHeader};
+use crate::column_values::bitpacked::BitpackedCodec;
+use crate::column_values::blockwise_linear::BlockwiseLinearCodec;
+use crate::column_values::linear::LinearCodec;
+
+#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
+#[repr(u8)]
+/// Available codecs to use to encode the u64 (via [`MonotonicallyMappableToU64`]) converted data.
+pub enum FastFieldCodecType {
+    /// Bitpack all values in the value range. The number of bits is defined by the amplitude
+    /// `column.max_value() - column.min_value()`
+    Bitpacked = 1,
+    /// Linear interpolation puts a line between the first and last value and then bitpacks the
+    /// values by the offset from the line. The number of bits is defined by the max deviation from
+    /// the line.
+    Linear = 2,
+    /// Same as [`FastFieldCodecType::Linear`], but encodes in blocks of 512 elements.
+    BlockwiseLinear = 3,
+}
+
+impl BinarySerializable for FastFieldCodecType {
+    fn serialize<W: Write>(&self, wrt: &mut W) -> io::Result<()> {
+        self.to_code().serialize(wrt)
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let code = u8::deserialize(reader)?;
+        let codec_type: Self = Self::from_code(code)
+            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Unknown code `{code}.`"))?;
+        Ok(codec_type)
+    }
+}
+
+impl FastFieldCodecType {
+    pub(crate) fn to_code(self) -> u8 {
+        self as u8
+    }
+
+    pub(crate) fn from_code(code: u8) -> Option<Self> {
+        match code {
+            1 => Some(Self::Bitpacked),
+            2 => Some(Self::Linear),
+            3 => Some(Self::BlockwiseLinear),
+            _ => None,
+        }
+    }
+}

 #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
 #[repr(u8)]
@@ -47,7 +98,7 @@ pub enum U128FastFieldCodecType {
 }

 impl BinarySerializable for U128FastFieldCodecType {
-    fn serialize<W: Write + ?Sized>(&self, wrt: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, wrt: &mut W) -> io::Result<()> {
        self.to_code().serialize(wrt)
    }

@@ -85,6 +136,68 @@ pub fn open_u128_mapped<T: MonotonicallyMappableToU128 + Debug>(
    Ok(Arc::new(monotonic_map_column(reader, inverted)))
 }

+/// Returns the correct codec reader wrapped in the `Arc` for the data.
+pub fn open_u64_mapped<T: MonotonicallyMappableToU64 + Debug>(
+    mut bytes: OwnedBytes,
+) -> io::Result<Arc<dyn ColumnValues<T>>> {
+    let header = Header::deserialize(&mut bytes)?;
+    match header.codec_type {
+        FastFieldCodecType::Bitpacked => open_specific_codec::<BitpackedCodec, _>(bytes, &header),
+        FastFieldCodecType::Linear => open_specific_codec::<LinearCodec, _>(bytes, &header),
+        FastFieldCodecType::BlockwiseLinear => {
+            open_specific_codec::<BlockwiseLinearCodec, _>(bytes, &header)
+        }
+    }
+}
+
+fn open_specific_codec<C: FastFieldCodec, Item: MonotonicallyMappableToU64 + Debug>(
+    bytes: OwnedBytes,
+    header: &Header,
+) -> io::Result<Arc<dyn ColumnValues<Item>>> {
+    let normalized_header = header.normalized();
+    let reader = C::open_from_bytes(bytes, normalized_header)?;
+    let min_value = header.min_value;
+    if let Some(gcd) = header.gcd {
+        let mapping = StrictlyMonotonicMappingInverter::from(
+            StrictlyMonotonicMappingToInternalGCDBaseval::new(gcd.get(), min_value),
+        );
+        Ok(Arc::new(monotonic_map_column(reader, mapping)))
+    } else {
+        let mapping = StrictlyMonotonicMappingInverter::from(
+            StrictlyMonotonicMappingToInternalBaseval::new(min_value),
+        );
+        Ok(Arc::new(monotonic_map_column(reader, mapping)))
+    }
+}
+
+/// The FastFieldSerializerEstimate trait is required on all variants
+/// of fast field compressions, to decide which one to choose.
+pub(crate) trait FastFieldCodec: 'static {
+    /// A codex needs to provide a unique name and id, which is
+    /// used for debugging and de/serialization.
+    const CODEC_TYPE: FastFieldCodecType;
+
+    type Reader: ColumnValues<u64> + 'static;
+
+    /// Reads the metadata and returns the CodecReader
+    fn open_from_bytes(bytes: OwnedBytes, header: NormalizedHeader) -> io::Result<Self::Reader>;
+
+    /// Serializes the data using the serializer into write.
+    ///
+    /// The column iterator should be preferred over using column `get_val` method for
+    /// performance reasons.
+    fn serialize(column: &dyn ColumnValues, write: &mut impl Write) -> io::Result<()>;
+
+    /// Returns an estimate of the compression ratio.
+    /// If the codec is not applicable, returns `None`.
+    ///
+    /// The baseline is uncompressed 64bit data.
+    ///
+    /// It could make sense to also return a value representing
+    /// computational complexity.
+    fn estimate(column: &dyn ColumnValues) -> Option<f32>;
+}
+
 #[cfg(all(test, feature = "unstable"))]
 mod bench {
    use std::sync::Arc;
--- a/columnar/src/column_values/serialize.rs
+++ b/columnar/src/column_values/serialize.rs
@@ -5,17 +5,18 @@ use std::num::NonZeroU64;
 use common::{BinarySerializable, VInt};
 use log::warn;

+use super::bitpacked::BitpackedCodec;
+use super::blockwise_linear::BlockwiseLinearCodec;
+use super::linear::LinearCodec;
 use super::monotonic_mapping::{
    StrictlyMonotonicFn, StrictlyMonotonicMappingToInternal,
    StrictlyMonotonicMappingToInternalGCDBaseval,
 };
 use super::{
-    monotonic_map_column, u64_based, ColumnValues, MonotonicallyMappableToU64,
-    U128FastFieldCodecType,
+    monotonic_map_column, ColumnValues, FastFieldCodec, FastFieldCodecType,
+    MonotonicallyMappableToU64, U128FastFieldCodecType,
 };
 use crate::column_values::compact_space::CompactSpaceCompressor;
-use crate::column_values::u64_based::CodecType;
-use crate::iterable::Iterable;

 /// The normalized header gives some parameters after applying the following
 /// normalization of the vector:
@@ -30,6 +31,53 @@ pub struct NormalizedHeader {
    pub max_value: u64,
 }

+#[derive(Debug, Copy, Clone)]
+pub(crate) struct Header {
+    pub num_vals: u32,
+    pub min_value: u64,
+    pub max_value: u64,
+    pub gcd: Option<NonZeroU64>,
+    pub codec_type: FastFieldCodecType,
+}
+
+impl Header {
+    pub fn normalized(self) -> NormalizedHeader {
+        let gcd = self.gcd.map(|gcd| gcd.get()).unwrap_or(1);
+        let gcd_min_val_mapping =
+            StrictlyMonotonicMappingToInternalGCDBaseval::new(gcd, self.min_value);
+
+        let max_value = gcd_min_val_mapping.mapping(self.max_value);
+        NormalizedHeader {
+            num_vals: self.num_vals,
+            max_value,
+        }
+    }
+
+    pub(crate) fn normalize_column<C: ColumnValues>(&self, from_column: C) -> impl ColumnValues {
+        normalize_column(from_column, self.min_value, self.gcd)
+    }
+
+    pub fn compute_header(
+        column: impl ColumnValues<u64>,
+        codecs: &[FastFieldCodecType],
+    ) -> Option<Header> {
+        let num_vals = column.num_vals();
+        let min_value = column.min_value();
+        let max_value = column.max_value();
+        let gcd = super::gcd::find_gcd(column.iter().map(|val| val - min_value))
+            .filter(|gcd| gcd.get() > 1u64);
+        let normalized_column = normalize_column(column, min_value, gcd);
+        let codec_type = detect_codec(normalized_column, codecs)?;
+        Some(Header {
+            num_vals,
+            min_value,
+            max_value,
+            gcd,
+            codec_type,
+        })
+    }
+}
+
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub(crate) struct U128Header {
    pub num_vals: u32,
@@ -37,7 +85,7 @@ pub(crate) struct U128Header {
 }

 impl BinarySerializable for U128Header {
-    fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
        VInt(self.num_vals as u64).serialize(writer)?;
        self.codec_type.serialize(writer)?;
        Ok(())
@@ -63,9 +111,40 @@ fn normalize_column<C: ColumnValues>(
    monotonic_map_column(from_column, mapping)
 }

+impl BinarySerializable for Header {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+        VInt(self.num_vals as u64).serialize(writer)?;
+        VInt(self.min_value).serialize(writer)?;
+        VInt(self.max_value - self.min_value).serialize(writer)?;
+        if let Some(gcd) = self.gcd {
+            VInt(gcd.get()).serialize(writer)?;
+        } else {
+            VInt(0u64).serialize(writer)?;
+        }
+        self.codec_type.serialize(writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let num_vals = VInt::deserialize(reader)?.0 as u32;
+        let min_value = VInt::deserialize(reader)?.0;
+        let amplitude = VInt::deserialize(reader)?.0;
+        let max_value = min_value + amplitude;
+        let gcd_u64 = VInt::deserialize(reader)?.0;
+        let codec_type = FastFieldCodecType::deserialize(reader)?;
+        Ok(Header {
+            num_vals,
+            min_value,
+            max_value,
+            gcd: NonZeroU64::new(gcd_u64),
+            codec_type,
+        })
+    }
+}
+
 /// Serializes u128 values with the compact space codec.
-pub fn serialize_column_values_u128<I: Iterator<Item = u128>>(
-    iterable: &dyn Fn() -> I,
+pub fn serialize_column_values_u128<F: Fn() -> I, I: Iterator<Item = u128>>(
+    iter_gen: F,
    num_vals: u32,
    output: &mut impl io::Write,
 ) -> io::Result<()> {
@@ -74,20 +153,105 @@ pub fn serialize_column_values_u128<I: Iterator<Item = u128>>(
        codec_type: U128FastFieldCodecType::CompactSpace,
    };
    header.serialize(output)?;
-    let compressor = CompactSpaceCompressor::train_from(iterable(), num_vals);
-    compressor.compress_into(iterable(), output)?;
+    let compressor = CompactSpaceCompressor::train_from(iter_gen(), num_vals);
+    compressor.compress_into(iter_gen(), output)?;

    Ok(())
 }

+/// Serializes the column with the codec with the best estimate on the data.
+pub fn serialize_column_values<T: MonotonicallyMappableToU64 + Debug>(
+    typed_column: impl ColumnValues<T>,
+    codecs: &[FastFieldCodecType],
+    output: &mut impl io::Write,
+) -> io::Result<()> {
+    let column = monotonic_map_column(typed_column, StrictlyMonotonicMappingToInternal::<T>::new());
+    let header = Header::compute_header(&column, codecs).ok_or_else(|| {
+        io::Error::new(
+            io::ErrorKind::InvalidInput,
+            format!(
+                "Data cannot be serialized with this list of codec. {:?}",
+                codecs
+            ),
+        )
+    })?;
+    header.serialize(output)?;
+    let normalized_column = header.normalize_column(column);
+    assert_eq!(normalized_column.min_value(), 0u64);
+    serialize_given_codec(normalized_column, header.codec_type, output)?;
+    Ok(())
+}
+
+fn detect_codec(
+    column: impl ColumnValues<u64>,
+    codecs: &[FastFieldCodecType],
+) -> Option<FastFieldCodecType> {
+    let mut estimations = Vec::new();
+    for &codec in codecs {
+        let estimation_opt = match codec {
+            FastFieldCodecType::Bitpacked => BitpackedCodec::estimate(&column),
+            FastFieldCodecType::Linear => LinearCodec::estimate(&column),
+            FastFieldCodecType::BlockwiseLinear => BlockwiseLinearCodec::estimate(&column),
+        };
+        if let Some(estimation) = estimation_opt {
+            estimations.push((estimation, codec));
+        }
+    }
+    if let Some(broken_estimation) = estimations.iter().find(|estimation| estimation.0.is_nan()) {
+        warn!(
+            "broken estimation for fast field codec {:?}",
+            broken_estimation.1
+        );
+    }
+    // removing nan values for codecs with broken calculations, and max values which disables
+    // codecs
+    estimations.retain(|estimation| !estimation.0.is_nan() && estimation.0 != f32::MAX);
+    estimations.sort_by(|(score_left, _), (score_right, _)| score_left.total_cmp(score_right));
+    Some(estimations.first()?.1)
+}
+
+pub(crate) fn serialize_given_codec(
+    column: impl ColumnValues<u64>,
+    codec_type: FastFieldCodecType,
+    output: &mut impl io::Write,
+) -> io::Result<()> {
+    match codec_type {
+        FastFieldCodecType::Bitpacked => {
+            BitpackedCodec::serialize(&column, output)?;
+        }
+        FastFieldCodecType::Linear => {
+            LinearCodec::serialize(&column, output)?;
+        }
+        FastFieldCodecType::BlockwiseLinear => {
+            BlockwiseLinearCodec::serialize(&column, output)?;
+        }
+    }
+    Ok(())
+}
+
 #[cfg(test)]
 pub mod tests {
-    use super::*;
-    use crate::column_values::u64_based::{
-        self, serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
-        ALL_U64_CODEC_TYPES,
-    };
+    use std::sync::Arc;

+    use common::OwnedBytes;
+
+    use super::*;
+    use crate::column_values::{open_u64_mapped, VecColumn};
+
+    const ALL_CODEC_TYPES: [FastFieldCodecType; 3] = [
+        FastFieldCodecType::Bitpacked,
+        FastFieldCodecType::Linear,
+        FastFieldCodecType::BlockwiseLinear,
+    ];
+
+    /// Helper function to serialize a column (autodetect from all codecs) and then open it
+    pub fn serialize_and_load<T: MonotonicallyMappableToU64 + Ord + Default>(
+        column: &[T],
+    ) -> Arc<dyn ColumnValues<T>> {
+        let mut buffer = Vec::new();
+        serialize_column_values(&VecColumn::from(&column), &ALL_CODEC_TYPES, &mut buffer).unwrap();
+        open_u64_mapped(OwnedBytes::new(buffer)).unwrap()
+    }
    #[test]
    fn test_serialize_deserialize_u128_header() {
        let original = U128Header {
@@ -103,22 +267,15 @@ pub mod tests {
    #[test]
    fn test_serialize_deserialize() {
        let original = [1u64, 5u64, 10u64];
-        let restored: Vec<u64> =
-            serialize_and_load_u64_based_column_values(&&original[..], &ALL_U64_CODEC_TYPES)
-                .iter()
-                .collect();
+        let restored: Vec<u64> = serialize_and_load(&original[..]).iter().collect();
        assert_eq!(&restored, &original[..]);
    }

    #[test]
    fn test_fastfield_bool_size_bitwidth_1() {
        let mut buffer = Vec::new();
-        serialize_u64_based_column_values(
-            || [false, true].into_iter(),
-            &ALL_U64_CODEC_TYPES,
-            &mut buffer,
-        )
-        .unwrap();
+        let col = VecColumn::from(&[false, true][..]);
+        serialize_column_values(&col, &ALL_CODEC_TYPES, &mut buffer).unwrap();
        // TODO put the header as a footer so that it serves as a padding.
        // 5 bytes of header, 1 byte of value, 7 bytes of padding.
        assert_eq!(buffer.len(), 5 + 1);
@@ -127,27 +284,19 @@ pub mod tests {
    #[test]
    fn test_fastfield_bool_bit_size_bitwidth_0() {
        let mut buffer = Vec::new();
-        serialize_u64_based_column_values(
-            || [false, true].into_iter(),
-            &ALL_U64_CODEC_TYPES,
-            &mut buffer,
-        )
-        .unwrap();
-        // 6 bytes of header, 0 bytes of value, 7 bytes of padding.
-        assert_eq!(buffer.len(), 6);
+        let col = VecColumn::from(&[true][..]);
+        serialize_column_values(&col, &ALL_CODEC_TYPES, &mut buffer).unwrap();
+        // 5 bytes of header, 0 bytes of value, 7 bytes of padding.
+        assert_eq!(buffer.len(), 5);
    }

    #[test]
    fn test_fastfield_gcd() {
        let mut buffer = Vec::new();
        let vals: Vec<u64> = (0..80).map(|val| (val % 7) * 1_000u64).collect();
-        serialize_u64_based_column_values(
-            || vals.iter().cloned(),
-            &[CodecType::Bitpacked],
-            &mut buffer,
-        )
-        .unwrap();
+        let col = VecColumn::from(&vals[..]);
+        serialize_column_values(&col, &[FastFieldCodecType::Bitpacked], &mut buffer).unwrap();
        // Values are stored over 3 bits.
-        assert_eq!(buffer.len(), 6 + (3 * 80 / 8));
+        assert_eq!(buffer.len(), 7 + (3 * 80 / 8));
    }
 }
--- a/columnar/src/column_values/stats.rs
+++ b/columnar/src/column_values/stats.rs
@@ -1,96 +0,0 @@
-use std::io;
-use std::io::Write;
-use std::num::NonZeroU64;
-
-use common::{BinarySerializable, VInt};
-
-use crate::RowId;
-
-#[derive(Debug, Clone, Eq, PartialEq)]
-pub struct Stats {
-    pub gcd: NonZeroU64,
-    pub min_value: u64,
-    pub max_value: u64,
-    pub num_rows: RowId,
-}
-
-impl Stats {
-    pub fn amplitude(&self) -> u64 {
-        self.max_value - self.min_value
-    }
-}
-
-impl BinarySerializable for Stats {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
-        VInt(self.min_value).serialize(writer)?;
-        VInt(self.gcd.get()).serialize(writer)?;
-        VInt(self.amplitude() / self.gcd).serialize(writer)?;
-        VInt(self.num_rows as u64).serialize(writer)?;
-        Ok(())
-    }
-
-    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
-        let min_value = VInt::deserialize(reader)?.0;
-        let gcd = VInt::deserialize(reader)?.0;
-        let gcd = NonZeroU64::new(gcd)
-            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "GCD of 0 is forbidden"))?;
-        let amplitude = VInt::deserialize(reader)?.0 * gcd.get();
-        let max_value = min_value + amplitude;
-        let num_rows = VInt::deserialize(reader)?.0 as RowId;
-        Ok(Stats {
-            min_value,
-            max_value,
-            num_rows,
-            gcd,
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::num::NonZeroU64;
-
-    use common::BinarySerializable;
-
-    use crate::column_values::Stats;
-
-    #[track_caller]
-    fn test_stats_ser_deser_aux(stats: &Stats, num_bytes: usize) {
-        let mut buffer: Vec<u8> = Vec::new();
-        stats.serialize(&mut buffer).unwrap();
-        assert_eq!(buffer.len(), num_bytes);
-        let deser_stats = Stats::deserialize(&mut &buffer[..]).unwrap();
-        assert_eq!(stats, &deser_stats);
-    }
-
-    #[test]
-    fn test_stats_serialization() {
-        test_stats_ser_deser_aux(
-            &(Stats {
-                gcd: NonZeroU64::new(3).unwrap(),
-                min_value: 1,
-                max_value: 3001,
-                num_rows: 10,
-            }),
-            5,
-        );
-        test_stats_ser_deser_aux(
-            &(Stats {
-                gcd: NonZeroU64::new(1_000).unwrap(),
-                min_value: 1,
-                max_value: 3001,
-                num_rows: 10,
-            }),
-            5,
-        );
-        test_stats_ser_deser_aux(
-            &(Stats {
-                gcd: NonZeroU64::new(1).unwrap(),
-                min_value: 0,
-                max_value: 0,
-                num_rows: 0,
-            }),
-            4,
-        );
-    }
-}
--- a/columnar/src/column_values/u64_based/tests.rs
+++ b/columnar/src/column_values/u64_based/tests.rs
@@ -2,90 +2,53 @@ use proptest::prelude::*;
 use proptest::strategy::Strategy;
 use proptest::{prop_oneof, proptest};

-#[test]
-fn test_serialize_and_load_simple() {
-    let mut buffer = Vec::new();
-    let vals = &[1u64, 2u64, 5u64];
-    serialize_u64_based_column_values(
-        || vals.iter().cloned(),
-        &[CodecType::Bitpacked, CodecType::BlockwiseLinear],
-        &mut buffer,
-    )
-    .unwrap();
-    assert_eq!(buffer.len(), 7);
-    let col = load_u64_based_column_values::<u64>(OwnedBytes::new(buffer)).unwrap();
-    assert_eq!(col.num_vals(), 3);
-    assert_eq!(col.get_val(0), 1);
-    assert_eq!(col.get_val(1), 2);
-    assert_eq!(col.get_val(2), 5);
-}
-pub(crate) fn create_and_validate<TColumnCodec: ColumnCodec>(
-    vals: &[u64],
+use super::bitpacked::BitpackedCodec;
+use super::blockwise_linear::BlockwiseLinearCodec;
+use super::linear::LinearCodec;
+use super::serialize::Header;
+
+pub(crate) fn create_and_validate<Codec: FastFieldCodec>(
+    data: &[u64],
    name: &str,
 ) -> Option<(f32, f32)> {
-    let mut stats_collector = StatsCollector::default();
-    let mut codec_estimator: TColumnCodec::Estimator = Default::default();
+    let col = &VecColumn::from(data);
+    let header = Header::compute_header(col, &[Codec::CODEC_TYPE])?;
+    let normalized_col = header.normalize_column(col);
+    let estimation = Codec::estimate(&normalized_col)?;

-    for val in vals.boxed_iter() {
-        stats_collector.collect(val);
-        codec_estimator.collect(val);
-    }
-    codec_estimator.finalize();
-    let stats = stats_collector.stats();
-    let estimation = codec_estimator.estimate(&stats)?;
+    let mut out = Vec::new();
+    let col = VecColumn::from(data);
+    serialize_column_values(&col, &[Codec::CODEC_TYPE], &mut out).unwrap();

-    let mut buffer = Vec::new();
-    codec_estimator
-        .serialize(&stats, vals.boxed_iter().as_mut(), &mut buffer)
-        .unwrap();
+    let actual_compression = out.len() as f32 / (data.len() as f32 * 8.0);

-    let actual_compression = buffer.len() as u64;
-
-    let reader = TColumnCodec::load(OwnedBytes::new(buffer)).unwrap();
-    assert_eq!(reader.num_vals(), vals.len() as u32);
-    for (doc, orig_val) in vals.iter().copied().enumerate() {
+    let reader = super::open_u64_mapped::<u64>(OwnedBytes::new(out)).unwrap();
+    assert_eq!(reader.num_vals(), data.len() as u32);
+    for (doc, orig_val) in data.iter().copied().enumerate() {
        let val = reader.get_val(doc as u32);
        assert_eq!(
            val, orig_val,
-            "val `{val}` does not match orig_val {orig_val:?}, in data set {name}, data `{vals:?}`",
+            "val `{val}` does not match orig_val {orig_val:?}, in data set {name}, data `{data:?}`",
        );
    }

-    if !vals.is_empty() {
-        let test_rand_idx = rand::thread_rng().gen_range(0..=vals.len() - 1);
-        let expected_positions: Vec<u32> = vals
+    if !data.is_empty() {
+        let test_rand_idx = rand::thread_rng().gen_range(0..=data.len() - 1);
+        let expected_positions: Vec<u32> = data
            .iter()
            .enumerate()
-            .filter(|(_, el)| **el == vals[test_rand_idx])
+            .filter(|(_, el)| **el == data[test_rand_idx])
            .map(|(pos, _)| pos as u32)
            .collect();
        let mut positions = Vec::new();
        reader.get_docids_for_value_range(
-            vals[test_rand_idx]..=vals[test_rand_idx],
-            0..vals.len() as u32,
+            data[test_rand_idx]..=data[test_rand_idx],
+            0..data.len() as u32,
            &mut positions,
        );
        assert_eq!(expected_positions, positions);
    }
-    dbg!(estimation);
-    dbg!(actual_compression);
-    if actual_compression > 20 {
-        assert!(relative_difference(estimation, actual_compression) < 0.10f32);
-    }
-    Some((
-        compression_rate(estimation, stats.num_rows),
-        compression_rate(actual_compression, stats.num_rows),
-    ))
-}
-
-fn compression_rate(num_bytes: u64, num_values: u32) -> f32 {
-    num_bytes as f32 / (num_values as f32 * 8.0)
-}
-
-fn relative_difference(left: u64, right: u64) -> f32 {
-    let left = left as f32;
-    let right = right as f32;
-    2.0f32 * (left - right).abs() / (left + right)
+    Some((estimation, actual_compression))
 }

 proptest! {
@@ -155,8 +118,8 @@ pub fn get_codec_test_datasets() -> Vec<(Vec<u64>, &'static str)> {
    data_and_names
 }

-fn test_codec<C: ColumnCodec>() {
-    let codec_name = std::any::type_name::<C>();
+fn test_codec<C: FastFieldCodec>() {
+    let codec_name = format!("{:?}", C::CODEC_TYPE);
    for (data, dataset_name) in get_codec_test_datasets() {
        let estimate_actual_opt: Option<(f32, f32)> =
            tests::create_and_validate::<C>(&data, dataset_name);
@@ -183,48 +146,53 @@ fn test_codec_multi_interpolation() {

 use super::*;

-fn estimate<C: ColumnCodec>(vals: &[u64]) -> Option<f32> {
-    let mut stats_collector = StatsCollector::default();
-    let mut estimator = C::Estimator::default();
-    for &val in vals {
-        stats_collector.collect(val);
-        estimator.collect(val);
-    }
-    estimator.finalize();
-    let stats = stats_collector.stats();
-    let num_bytes = estimator.estimate(&stats)?;
-    if stats.num_rows == 0 {
-        return None;
-    }
-    Some(num_bytes as f32 / (8.0 * stats.num_rows as f32))
-}
-
 #[test]
 fn estimation_good_interpolation_case() {
    let data = (10..=20000_u64).collect::<Vec<_>>();
+    let data: VecColumn = data.as_slice().into();

-    let linear_interpol_estimation = estimate::<LinearCodec>(&data).unwrap();
+    let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
    assert_le!(linear_interpol_estimation, 0.01);

-    let multi_linear_interpol_estimation = estimate::<BlockwiseLinearCodec>(&data).unwrap();
+    let multi_linear_interpol_estimation = BlockwiseLinearCodec::estimate(&data).unwrap();
    assert_le!(multi_linear_interpol_estimation, 0.2);
    assert_lt!(linear_interpol_estimation, multi_linear_interpol_estimation);

-    let bitpacked_estimation = estimate::<BitpackedCodec>(&data).unwrap();
+    let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap();
    assert_lt!(linear_interpol_estimation, bitpacked_estimation);
 }
+#[test]
+fn estimation_test_bad_interpolation_case() {
+    let data: &[u64] = &[200, 10, 10, 10, 10, 1000, 20];
+
+    let data: VecColumn = data.into();
+    let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
+    assert_le!(linear_interpol_estimation, 0.34);
+
+    let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap();
+    assert_lt!(bitpacked_estimation, linear_interpol_estimation);
+}
+
+#[test]
+fn estimation_prefer_bitpacked() {
+    let data = VecColumn::from(&[10, 10, 10, 10]);
+    let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
+    let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap();
+    assert_lt!(bitpacked_estimation, linear_interpol_estimation);
+}

 #[test]
 fn estimation_test_bad_interpolation_case_monotonically_increasing() {
    let mut data: Vec<u64> = (201..=20000_u64).collect();
    data.push(1_000_000);
+    let data: VecColumn = data.as_slice().into();

    // in this case the linear interpolation can't in fact not be worse than bitpacking,
    // but the estimator adds some threshold, which leads to estimated worse behavior
-    let linear_interpol_estimation = estimate::<LinearCodec>(&data[..]).unwrap();
+    let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
    assert_le!(linear_interpol_estimation, 0.35);

-    let bitpacked_estimation = estimate::<BitpackedCodec>(&data).unwrap();
+    let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap();
    assert_le!(bitpacked_estimation, 0.32);
    assert_le!(bitpacked_estimation, linear_interpol_estimation);
 }
@@ -233,7 +201,7 @@ fn estimation_test_bad_interpolation_case_monotonically_increasing() {
 fn test_fast_field_codec_type_to_code() {
    let mut count_codec = 0;
    for code in 0..=255 {
-        if let Some(codec_type) = CodecType::try_from_code(code) {
+        if let Some(codec_type) = FastFieldCodecType::from_code(code) {
            assert_eq!(codec_type.to_code(), code);
            count_codec += 1;
        }
@@ -241,16 +209,19 @@ fn test_fast_field_codec_type_to_code() {
    assert_eq!(count_codec, 3);
 }

-fn test_fastfield_gcd_i64_with_codec(codec_type: CodecType, num_vals: usize) -> io::Result<()> {
+fn test_fastfield_gcd_i64_with_codec(
+    codec_type: FastFieldCodecType,
+    num_vals: usize,
+) -> io::Result<()> {
    let mut vals: Vec<i64> = (-4..=(num_vals as i64) - 5).map(|val| val * 1000).collect();
    let mut buffer: Vec<u8> = Vec::new();
-    crate::column_values::serialize_u64_based_column_values(
-        || vals.iter().cloned(),
+    crate::column_values::serialize_column_values(
+        &VecColumn::from(&vals),
        &[codec_type],
        &mut buffer,
    )?;
    let buffer = OwnedBytes::new(buffer);
-    let column = crate::column_values::load_u64_based_column_values::<i64>(buffer.clone())?;
+    let column = crate::column_values::open_u64_mapped::<i64>(buffer.clone())?;
    assert_eq!(column.get_val(0), -4000i64);
    assert_eq!(column.get_val(1), -3000i64);
    assert_eq!(column.get_val(2), -2000i64);
@@ -261,8 +232,8 @@ fn test_fastfield_gcd_i64_with_codec(codec_type: CodecType, num_vals: usize) ->
    let mut buffer_without_gcd = Vec::new();
    vals.pop();
    vals.push(1001i64);
-    crate::column_values::serialize_u64_based_column_values(
-        || vals.iter().cloned(),
+    crate::column_values::serialize_column_values(
+        &VecColumn::from(&vals),
        &[codec_type],
        &mut buffer_without_gcd,
    )?;
@@ -275,25 +246,28 @@ fn test_fastfield_gcd_i64_with_codec(codec_type: CodecType, num_vals: usize) ->
 #[test]
 fn test_fastfield_gcd_i64() -> io::Result<()> {
    for &codec_type in &[
-        CodecType::Bitpacked,
-        CodecType::BlockwiseLinear,
-        CodecType::Linear,
+        FastFieldCodecType::Bitpacked,
+        FastFieldCodecType::BlockwiseLinear,
+        FastFieldCodecType::Linear,
    ] {
        test_fastfield_gcd_i64_with_codec(codec_type, 5500)?;
    }
    Ok(())
 }

-fn test_fastfield_gcd_u64_with_codec(codec_type: CodecType, num_vals: usize) -> io::Result<()> {
+fn test_fastfield_gcd_u64_with_codec(
+    codec_type: FastFieldCodecType,
+    num_vals: usize,
+) -> io::Result<()> {
    let mut vals: Vec<u64> = (1..=num_vals).map(|i| i as u64 * 1000u64).collect();
    let mut buffer: Vec<u8> = Vec::new();
-    crate::column_values::serialize_u64_based_column_values(
-        || vals.iter().cloned(),
+    crate::column_values::serialize_column_values(
+        &VecColumn::from(&vals),
        &[codec_type],
        &mut buffer,
    )?;
    let buffer = OwnedBytes::new(buffer);
-    let column = crate::column_values::load_u64_based_column_values::<u64>(buffer.clone())?;
+    let column = crate::column_values::open_u64_mapped::<u64>(buffer.clone())?;
    assert_eq!(column.get_val(0), 1000u64);
    assert_eq!(column.get_val(1), 2000u64);
    assert_eq!(column.get_val(2), 3000u64);
@@ -304,8 +278,8 @@ fn test_fastfield_gcd_u64_with_codec(codec_type: CodecType, num_vals: usize) ->
    let mut buffer_without_gcd = Vec::new();
    vals.pop();
    vals.push(1001u64);
-    crate::column_values::serialize_u64_based_column_values(
-        || vals.iter().cloned(),
+    crate::column_values::serialize_column_values(
+        &VecColumn::from(&vals),
        &[codec_type],
        &mut buffer_without_gcd,
    )?;
@@ -317,9 +291,9 @@ fn test_fastfield_gcd_u64_with_codec(codec_type: CodecType, num_vals: usize) ->
 #[test]
 fn test_fastfield_gcd_u64() -> io::Result<()> {
    for &codec_type in &[
-        CodecType::Bitpacked,
-        CodecType::BlockwiseLinear,
-        CodecType::Linear,
+        FastFieldCodecType::Bitpacked,
+        FastFieldCodecType::BlockwiseLinear,
+        FastFieldCodecType::Linear,
    ] {
        test_fastfield_gcd_u64_with_codec(codec_type, 5500)?;
    }
@@ -328,10 +302,7 @@ fn test_fastfield_gcd_u64() -> io::Result<()> {

 #[test]
 pub fn test_fastfield2() {
-    let test_fastfield = crate::column_values::serialize_and_load_u64_based_column_values::<u64>(
-        &&[100u64, 200u64, 300u64][..],
-        &ALL_U64_CODEC_TYPES,
-    );
+    let test_fastfield = crate::column_values::serialize_and_load(&[100u64, 200u64, 300u64]);
    assert_eq!(test_fastfield.get_val(0), 100);
    assert_eq!(test_fastfield.get_val(1), 200);
    assert_eq!(test_fastfield.get_val(2), 300);
--- a/columnar/src/column_values/u64_based/bitpacked.rs
+++ b/columnar/src/column_values/u64_based/bitpacked.rs
@@ -1,127 +0,0 @@
-use std::io::{self, Write};
-
-use common::{BinarySerializable, OwnedBytes};
-use fastdivide::DividerU64;
-use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
-
-use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, Stats};
-use crate::{ColumnValues, RowId};
-
-/// Depending on the field type, a different
-/// fast field is required.
-#[derive(Clone)]
-pub struct BitpackedReader {
-    data: OwnedBytes,
-    bit_unpacker: BitUnpacker,
-    stats: Stats,
-}
-
-impl ColumnValues for BitpackedReader {
-    #[inline(always)]
-    fn get_val(&self, doc: u32) -> u64 {
-        self.stats.min_value + self.stats.gcd.get() * self.bit_unpacker.get(doc, &self.data)
-    }
-
-    #[inline]
-    fn min_value(&self) -> u64 {
-        self.stats.min_value
-    }
-    #[inline]
-    fn max_value(&self) -> u64 {
-        self.stats.max_value
-    }
-    #[inline]
-    fn num_vals(&self) -> RowId {
-        self.stats.num_rows
-    }
-}
-
-fn num_bits(stats: &Stats) -> u8 {
-    compute_num_bits(stats.amplitude() / stats.gcd)
-}
-
-#[derive(Default)]
-pub struct BitpackedCodecEstimator;
-
-impl ColumnCodecEstimator for BitpackedCodecEstimator {
-    fn collect(&mut self, _value: u64) {}
-
-    fn estimate(&self, stats: &Stats) -> Option<u64> {
-        let num_bits_per_value = num_bits(stats);
-        Some(stats.num_bytes() + (stats.num_rows as u64 * (num_bits_per_value as u64) + 7) / 8)
-    }
-
-    fn serialize(
-        &self,
-        stats: &Stats,
-        vals: &mut dyn Iterator<Item = u64>,
-        wrt: &mut dyn Write,
-    ) -> io::Result<()> {
-        stats.serialize(wrt)?;
-        let num_bits = num_bits(stats);
-        let mut bit_packer = BitPacker::new();
-        let divider = DividerU64::divide_by(stats.gcd.get());
-        for val in vals {
-            bit_packer.write(divider.divide(val - stats.min_value), num_bits, wrt)?;
-        }
-        bit_packer.close(wrt)?;
-        Ok(())
-    }
-}
-
-pub struct BitpackedCodec;
-
-impl ColumnCodec for BitpackedCodec {
-    type Reader = BitpackedReader;
-    type Estimator = BitpackedCodecEstimator;
-
-    /// Opens a fast field given a file.
-    fn load(mut data: OwnedBytes) -> io::Result<Self::Reader> {
-        let stats = Stats::deserialize(&mut data)?;
-        let num_bits = num_bits(&stats);
-        let bit_unpacker = BitUnpacker::new(num_bits);
-        Ok(BitpackedReader {
-            data,
-            bit_unpacker,
-            stats,
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::column_values::u64_based::tests::create_and_validate;
-
-    #[test]
-    fn test_with_codec_data_sets_simple() {
-        create_and_validate::<BitpackedCodec>(&[4, 3, 12], "name");
-    }
-
-    #[test]
-    fn test_with_codec_data_sets_simple_gcd() {
-        create_and_validate::<BitpackedCodec>(&[1000, 2000, 3000], "name");
-    }
-
-    #[test]
-    fn test_with_codec_data_sets() {
-        let data_sets = crate::column_values::u64_based::tests::get_codec_test_datasets();
-        for (mut data, name) in data_sets {
-            create_and_validate::<BitpackedCodec>(&data, name);
-            data.reverse();
-            create_and_validate::<BitpackedCodec>(&data, name);
-        }
-    }
-
-    #[test]
-    fn bitpacked_fast_field_rand() {
-        for _ in 0..500 {
-            let mut data = (0..1 + rand::random::<u8>() as usize)
-                .map(|_| rand::random::<i64>() as u64 / 2)
-                .collect::<Vec<_>>();
-            create_and_validate::<BitpackedCodec>(&data, "rand");
-            data.reverse();
-            create_and_validate::<BitpackedCodec>(&data, "rand");
-        }
-    }
-}
--- a/columnar/src/column_values/u64_based/blockwise_linear.rs
+++ b/columnar/src/column_values/u64_based/blockwise_linear.rs
@@ -1,281 +0,0 @@
-use std::io::Write;
-use std::sync::Arc;
-use std::{io, iter};
-
-use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes};
-use fastdivide::DividerU64;
-use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
-
-use crate::column_values::u64_based::line::Line;
-use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, Stats};
-use crate::column_values::{ColumnValues, VecColumn};
-use crate::MonotonicallyMappableToU64;
-
-const BLOCK_SIZE: u32 = 512u32;
-
-#[derive(Debug, Default)]
-struct Block {
-    line: Line,
-    bit_unpacker: BitUnpacker,
-    data_start_offset: usize,
-}
-
-impl BinarySerializable for Block {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
-        self.line.serialize(writer)?;
-        self.bit_unpacker.bit_width().serialize(writer)?;
-        Ok(())
-    }
-
-    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
-        let line = Line::deserialize(reader)?;
-        let bit_width = u8::deserialize(reader)?;
-        Ok(Block {
-            line,
-            bit_unpacker: BitUnpacker::new(bit_width),
-            data_start_offset: 0,
-        })
-    }
-}
-
-fn compute_num_blocks(num_vals: u32) -> u32 {
-    (num_vals + BLOCK_SIZE - 1) / BLOCK_SIZE
-}
-
-pub struct BlockwiseLinearEstimator {
-    block: Vec<u64>,
-    values_num_bytes: u64,
-    meta_num_bytes: u64,
-}
-
-impl Default for BlockwiseLinearEstimator {
-    fn default() -> Self {
-        Self {
-            block: Vec::with_capacity(BLOCK_SIZE as usize),
-            values_num_bytes: 0u64,
-            meta_num_bytes: 0u64,
-        }
-    }
-}
-
-impl BlockwiseLinearEstimator {
-    fn flush_block_estimate(&mut self) {
-        if self.block.is_empty() {
-            return;
-        }
-        let line = Line::train(&VecColumn::from(&self.block));
-        let mut max_value = 0u64;
-        for (i, buffer_val) in self.block.iter().enumerate() {
-            let interpolated_val = line.eval(i as u32);
-            let val = buffer_val.wrapping_sub(interpolated_val);
-            max_value = val.max(max_value);
-        }
-        let bit_width = compute_num_bits(max_value) as usize;
-        self.values_num_bytes += (bit_width * self.block.len() + 7) as u64 / 8;
-        self.meta_num_bytes += 1 + line.num_bytes();
-    }
-}
-
-impl ColumnCodecEstimator for BlockwiseLinearEstimator {
-    fn collect(&mut self, value: u64) {
-        self.block.push(value);
-        if self.block.len() == BLOCK_SIZE as usize {
-            self.flush_block_estimate();
-            self.block.clear();
-        }
-    }
-    fn estimate(&self, stats: &Stats) -> Option<u64> {
-        let mut estimate = 4 + stats.num_bytes() + self.meta_num_bytes + self.values_num_bytes;
-        if stats.gcd.get() > 1 {
-            let estimate_gain_from_gcd =
-                (stats.gcd.get() as f32).log2().floor() * stats.num_rows as f32 / 8.0f32;
-            estimate = estimate.saturating_sub(estimate_gain_from_gcd as u64);
-        }
-        Some(estimate)
-    }
-
-    fn finalize(&mut self) {
-        self.flush_block_estimate();
-    }
-
-    fn serialize(
-        &self,
-        stats: &Stats,
-        mut vals: &mut dyn Iterator<Item = u64>,
-        wrt: &mut dyn Write,
-    ) -> io::Result<()> {
-        stats.serialize(wrt)?;
-        let mut buffer = Vec::with_capacity(BLOCK_SIZE as usize);
-        let num_blocks = compute_num_blocks(stats.num_rows) as usize;
-        let mut blocks = Vec::with_capacity(num_blocks);
-
-        let mut bit_packer = BitPacker::new();
-
-        let gcd_divider = DividerU64::divide_by(stats.gcd.get());
-
-        for _ in 0..num_blocks {
-            buffer.clear();
-            buffer.extend(
-                (&mut vals)
-                    .map(MonotonicallyMappableToU64::to_u64)
-                    .take(BLOCK_SIZE as usize),
-            );
-
-            for buffer_val in buffer.iter_mut() {
-                *buffer_val = gcd_divider.divide(*buffer_val - stats.min_value);
-            }
-
-            let mut line = Line::train(&VecColumn::from(&buffer));
-
-            assert!(!buffer.is_empty());
-
-            for (i, buffer_val) in buffer.iter_mut().enumerate() {
-                let interpolated_val = line.eval(i as u32);
-                *buffer_val = buffer_val.wrapping_sub(interpolated_val);
-            }
-
-            let bit_width = buffer.iter().copied().map(compute_num_bits).max().unwrap();
-
-            for &buffer_val in &buffer {
-                bit_packer.write(buffer_val, bit_width, wrt)?;
-            }
-
-            blocks.push(Block {
-                line,
-                bit_unpacker: BitUnpacker::new(bit_width),
-                data_start_offset: 0,
-            });
-        }
-
-        bit_packer.close(wrt)?;
-
-        assert_eq!(blocks.len(), num_blocks);
-
-        let mut counting_wrt = CountingWriter::wrap(wrt);
-        for block in &blocks {
-            block.serialize(&mut counting_wrt)?;
-        }
-        let footer_len = counting_wrt.written_bytes();
-        (footer_len as u32).serialize(&mut counting_wrt)?;
-
-        Ok(())
-    }
-}
-
-pub struct BlockwiseLinearCodec;
-
-impl ColumnCodec<u64> for BlockwiseLinearCodec {
-    type Reader = BlockwiseLinearReader;
-
-    type Estimator = BlockwiseLinearEstimator;
-
-    fn load(mut bytes: OwnedBytes) -> io::Result<Self::Reader> {
-        let stats = Stats::deserialize(&mut bytes)?;
-        let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
-        let footer_offset = bytes.len() - 4 - footer_len as usize;
-        let (data, mut footer) = bytes.split(footer_offset);
-        let num_blocks = compute_num_blocks(stats.num_rows);
-        let mut blocks: Vec<Block> = iter::repeat_with(|| Block::deserialize(&mut footer))
-            .take(num_blocks as usize)
-            .collect::<io::Result<_>>()?;
-        let mut start_offset = 0;
-        for block in &mut blocks {
-            block.data_start_offset = start_offset;
-            start_offset += (block.bit_unpacker.bit_width() as usize) * BLOCK_SIZE as usize / 8;
-        }
-        Ok(BlockwiseLinearReader {
-            blocks: blocks.into_boxed_slice().into(),
-            data,
-            stats,
-        })
-    }
-}
-
-#[derive(Clone)]
-pub struct BlockwiseLinearReader {
-    blocks: Arc<[Block]>,
-    data: OwnedBytes,
-    stats: Stats,
-}
-
-impl ColumnValues for BlockwiseLinearReader {
-    #[inline(always)]
-    fn get_val(&self, idx: u32) -> u64 {
-        let block_id = (idx / BLOCK_SIZE as u32) as usize;
-        let idx_within_block = idx % (BLOCK_SIZE as u32);
-        let block = &self.blocks[block_id];
-        let interpoled_val: u64 = block.line.eval(idx_within_block);
-        let block_bytes = &self.data[block.data_start_offset..];
-        let bitpacked_diff = block.bit_unpacker.get(idx_within_block, block_bytes);
-        // TODO optimize me! the line parameters could be tweaked to include the multiplication and
-        // remove the dependency.
-        self.stats.min_value
-            + self
-                .stats
-                .gcd
-                .get()
-                .wrapping_mul(interpoled_val.wrapping_add(bitpacked_diff))
-    }
-
-    #[inline(always)]
-    fn min_value(&self) -> u64 {
-        self.stats.min_value
-    }
-
-    #[inline(always)]
-    fn max_value(&self) -> u64 {
-        self.stats.max_value
-    }
-
-    #[inline(always)]
-    fn num_vals(&self) -> u32 {
-        self.stats.num_rows
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::column_values::u64_based::tests::create_and_validate;
-
-    #[test]
-    fn test_with_codec_data_sets_simple() {
-        create_and_validate::<BlockwiseLinearCodec>(
-            &[11, 20, 40, 20, 10, 10, 10, 10, 10, 10],
-            "simple test",
-        )
-        .unwrap();
-    }
-
-    #[test]
-    fn test_with_codec_data_sets_simple_gcd() {
-        let (_, actual_compression_rate) = create_and_validate::<BlockwiseLinearCodec>(
-            &[10, 20, 40, 20, 10, 10, 10, 10, 10, 10],
-            "name",
-        )
-        .unwrap();
-        assert_eq!(actual_compression_rate, 0.175);
-    }
-
-    #[test]
-    fn test_with_codec_data_sets() {
-        let data_sets = crate::column_values::u64_based::tests::get_codec_test_datasets();
-        for (mut data, name) in data_sets {
-            create_and_validate::<BlockwiseLinearCodec>(&data, name);
-            data.reverse();
-            create_and_validate::<BlockwiseLinearCodec>(&data, name);
-        }
-    }
-
-    #[test]
-    fn test_blockwise_linear_fast_field_rand() {
-        for _ in 0..500 {
-            let mut data = (0..1 + rand::random::<u8>() as usize)
-                .map(|_| rand::random::<i64>() as u64 / 2)
-                .collect::<Vec<_>>();
-            create_and_validate::<BlockwiseLinearCodec>(&data, "rand");
-            data.reverse();
-            create_and_validate::<BlockwiseLinearCodec>(&data, "rand");
-        }
-    }
-}
--- a/columnar/src/column_values/u64_based/linear.rs
+++ b/columnar/src/column_values/u64_based/linear.rs
@@ -1,277 +0,0 @@
-use std::io;
-
-use common::{BinarySerializable, OwnedBytes};
-use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
-
-use super::line::Line;
-use super::ColumnValues;
-use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, Stats};
-use crate::column_values::VecColumn;
-use crate::{MonotonicallyMappableToU64, RowId};
-
-const HALF_SPACE: u64 = u64::MAX / 2;
-const LINE_ESTIMATION_BLOCK_LEN: usize = 512;
-
-/// Depending on the field type, a different
-/// fast field is required.
-#[derive(Clone)]
-pub struct LinearReader {
-    data: OwnedBytes,
-    linear_params: LinearParams,
-    stats: Stats,
-}
-
-impl ColumnValues for LinearReader {
-    #[inline]
-    fn get_val(&self, doc: u32) -> u64 {
-        let interpoled_val: u64 = self.linear_params.line.eval(doc);
-        let bitpacked_diff = self.linear_params.bit_unpacker.get(doc, &self.data);
-        interpoled_val.wrapping_add(bitpacked_diff)
-    }
-
-    #[inline(always)]
-    fn min_value(&self) -> u64 {
-        self.stats.min_value
-    }
-
-    #[inline(always)]
-    fn max_value(&self) -> u64 {
-        self.stats.max_value
-    }
-
-    #[inline]
-    fn num_vals(&self) -> u32 {
-        self.stats.num_rows
-    }
-}
-
-/// Fastfield serializer, which tries to guess values by linear interpolation
-/// and stores the difference bitpacked.
-pub struct LinearCodec;
-
-#[derive(Debug, Clone)]
-struct LinearParams {
-    line: Line,
-    bit_unpacker: BitUnpacker,
-}
-
-impl BinarySerializable for LinearParams {
-    fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
-        self.line.serialize(writer)?;
-        self.bit_unpacker.bit_width().serialize(writer)?;
-        Ok(())
-    }
-
-    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
-        let line = Line::deserialize(reader)?;
-        let bit_width = u8::deserialize(reader)?;
-        Ok(Self {
-            line,
-            bit_unpacker: BitUnpacker::new(bit_width),
-        })
-    }
-}
-
-pub struct LinearCodecEstimator {
-    block: Vec<u64>,
-    line: Option<Line>,
-    row_id: RowId,
-    min_deviation: u64,
-    max_deviation: u64,
-    first_val: u64,
-    last_val: u64,
-}
-
-impl Default for LinearCodecEstimator {
-    fn default() -> LinearCodecEstimator {
-        LinearCodecEstimator {
-            block: Vec::with_capacity(LINE_ESTIMATION_BLOCK_LEN),
-            line: None,
-            row_id: 0,
-            min_deviation: u64::MAX,
-            max_deviation: u64::MIN,
-            first_val: 0u64,
-            last_val: 0u64,
-        }
-    }
-}
-
-impl ColumnCodecEstimator for LinearCodecEstimator {
-    fn finalize(&mut self) {
-        if let Some(line) = self.line.as_mut() {
-            line.intercept = line
-                .intercept
-                .wrapping_add(self.min_deviation)
-                .wrapping_sub(HALF_SPACE);
-        }
-    }
-
-    fn estimate(&self, stats: &Stats) -> Option<u64> {
-        let line = self.line?;
-        let amplitude = self.max_deviation - self.min_deviation;
-        let num_bits = compute_num_bits(amplitude);
-        let linear_params = LinearParams {
-            line,
-            bit_unpacker: BitUnpacker::new(num_bits),
-        };
-        Some(
-            stats.num_bytes()
-                + linear_params.num_bytes()
-                + (num_bits as u64 * stats.num_rows as u64 + 7) / 8,
-        )
-    }
-
-    fn serialize(
-        &self,
-        stats: &Stats,
-        vals: &mut dyn Iterator<Item = u64>,
-        wrt: &mut dyn io::Write,
-    ) -> io::Result<()> {
-        stats.serialize(wrt)?;
-        let line = self.line.unwrap();
-        let amplitude = self.max_deviation - self.min_deviation;
-        let num_bits = compute_num_bits(amplitude);
-        let linear_params = LinearParams {
-            line,
-            bit_unpacker: BitUnpacker::new(num_bits),
-        };
-        linear_params.serialize(wrt)?;
-        let mut bit_packer = BitPacker::new();
-        for (pos, value) in vals.enumerate() {
-            let calculated_value = line.eval(pos as u32);
-            let offset = value.wrapping_sub(calculated_value);
-            bit_packer.write(offset, num_bits, wrt)?;
-        }
-        bit_packer.close(wrt)?;
-        Ok(())
-    }
-
-    fn collect(&mut self, value: u64) {
-        if let Some(line) = self.line {
-            self.collect_after_line_estimation(&line, value);
-        } else {
-            self.collect_before_line_estimation(value);
-        }
-    }
-}
-
-impl LinearCodecEstimator {
-    #[inline]
-    fn collect_after_line_estimation(&mut self, line: &Line, value: u64) {
-        let interpoled_val: u64 = line.eval(self.row_id);
-        let deviation = value.wrapping_add(HALF_SPACE).wrapping_sub(interpoled_val);
-        self.min_deviation = self.min_deviation.min(deviation);
-        self.max_deviation = self.max_deviation.max(deviation);
-        if self.row_id == 0 {
-            self.first_val = value;
-        }
-        self.last_val = value;
-        self.row_id += 1u32;
-    }
-
-    #[inline]
-    fn collect_before_line_estimation(&mut self, value: u64) {
-        self.block.push(value);
-        if self.block.len() == LINE_ESTIMATION_BLOCK_LEN {
-            let line = Line::train(&VecColumn::from(&self.block));
-            let block = std::mem::take(&mut self.block);
-            for val in block {
-                self.collect_after_line_estimation(&line, val);
-            }
-            self.line = Some(line);
-        }
-    }
-}
-
-impl ColumnCodec for LinearCodec {
-    type Reader = LinearReader;
-
-    type Estimator = LinearCodecEstimator;
-
-    fn load(mut data: OwnedBytes) -> io::Result<Self::Reader> {
-        let stats = Stats::deserialize(&mut data)?;
-        let linear_params = LinearParams::deserialize(&mut data)?;
-        Ok(LinearReader {
-            stats,
-            linear_params,
-            data,
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use rand::RngCore;
-
-    use super::*;
-    use crate::column_values::u64_based::tests::{create_and_validate, get_codec_test_datasets};
-
-    #[test]
-    fn test_compression_simple() {
-        let vals = (100u64..)
-            .take(super::LINE_ESTIMATION_BLOCK_LEN)
-            .collect::<Vec<_>>();
-        create_and_validate::<LinearCodec>(&vals, "simple monotonically large").unwrap();
-    }
-
-    #[test]
-    fn test_compression() {
-        let data = (10..=6_000_u64).collect::<Vec<_>>();
-        let (estimate, actual_compression) =
-            create_and_validate::<LinearCodec>(&data, "simple monotonically large").unwrap();
-        assert_le!(actual_compression, 0.001);
-        assert_le!(estimate, 0.02);
-    }
-
-    #[test]
-    fn test_with_codec_datasets() {
-        let data_sets = get_codec_test_datasets();
-        for (mut data, name) in data_sets {
-            create_and_validate::<LinearCodec>(&data, name);
-            data.reverse();
-            create_and_validate::<LinearCodec>(&data, name);
-        }
-    }
-    #[test]
-    fn linear_interpol_fast_field_test_large_amplitude() {
-        let data = vec![
-            i64::MAX as u64 / 2,
-            i64::MAX as u64 / 3,
-            i64::MAX as u64 / 2,
-        ];
-        create_and_validate::<LinearCodec>(&data, "large amplitude");
-    }
-
-    #[test]
-    fn overflow_error_test() {
-        let data = vec![1572656989877777, 1170935903116329, 720575940379279, 0];
-        create_and_validate::<LinearCodec>(&data, "overflow test");
-    }
-
-    #[test]
-    fn linear_interpol_fast_concave_data() {
-        let data = vec![0, 1, 2, 5, 8, 10, 20, 50];
-        create_and_validate::<LinearCodec>(&data, "concave data");
-    }
-    #[test]
-    fn linear_interpol_fast_convex_data() {
-        let data = vec![0, 40, 60, 70, 75, 77];
-        create_and_validate::<LinearCodec>(&data, "convex data");
-    }
-    #[test]
-    fn linear_interpol_fast_field_test_simple() {
-        let data = (10..=20_u64).collect::<Vec<_>>();
-        create_and_validate::<LinearCodec>(&data, "simple monotonically");
-    }
-
-    #[test]
-    fn linear_interpol_fast_field_rand() {
-        let mut rng = rand::thread_rng();
-        for _ in 0..50 {
-            let mut data = (0..10_000).map(|_| rng.next_u64()).collect::<Vec<_>>();
-            create_and_validate::<LinearCodec>(&data, "random");
-            data.reverse();
-            create_and_validate::<LinearCodec>(&data, "random");
-        }
-    }
-}
--- a/columnar/src/column_values/u64_based/mod.rs
+++ b/columnar/src/column_values/u64_based/mod.rs
@@ -1,186 +0,0 @@
-mod bitpacked;
-mod blockwise_linear;
-mod line;
-mod linear;
-mod stats_collector;
-
-use std::io;
-use std::io::Write;
-use std::sync::Arc;
-
-use common::{BinarySerializable, OwnedBytes};
-
-use crate::column_values::monotonic_mapping::{
-    StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
-};
-use crate::column_values::u64_based::bitpacked::BitpackedCodec;
-use crate::column_values::u64_based::blockwise_linear::BlockwiseLinearCodec;
-use crate::column_values::u64_based::linear::LinearCodec;
-use crate::column_values::u64_based::stats_collector::StatsCollector;
-use crate::column_values::{monotonic_map_column, Stats};
-use crate::iterable::Iterable;
-use crate::{ColumnValues, MonotonicallyMappableToU64};
-
-pub trait ColumnCodecEstimator<T = u64>: 'static {
-    fn collect(&mut self, value: u64);
-    fn estimate(&self, stats: &Stats) -> Option<u64>;
-    fn finalize(&mut self) {}
-    fn serialize(
-        &self,
-        stats: &Stats,
-        vals: &mut dyn Iterator<Item = T>,
-        wrt: &mut dyn io::Write,
-    ) -> io::Result<()>;
-}
-
-pub trait ColumnCodec<T: PartialOrd = u64> {
-    type Reader: ColumnValues<T> + 'static;
-    type Estimator: ColumnCodecEstimator + Default;
-
-    fn load(bytes: OwnedBytes) -> io::Result<Self::Reader>;
-
-    fn estimator() -> Self::Estimator {
-        Self::Estimator::default()
-    }
-    fn boxed_estimator() -> Box<dyn ColumnCodecEstimator> {
-        Box::new(Self::estimator())
-    }
-}
-
-/// Available codecs to use to encode the u64 (via [`MonotonicallyMappableToU64`]) converted data.
-#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
-#[repr(u8)]
-pub enum CodecType {
-    /// Bitpack all values in the value range. The number of bits is defined by the amplitude
-    /// `column.max_value() - column.min_value()`
-    Bitpacked = 0u8,
-    /// Linear interpolation puts a line between the first and last value and then bitpacks the
-    /// values by the offset from the line. The number of bits is defined by the max deviation from
-    /// the line.
-    Linear = 1u8,
-    /// Same as [`CodecType::Linear`], but encodes in blocks of 512 elements.
-    BlockwiseLinear = 2u8,
-}
-
-pub const ALL_U64_CODEC_TYPES: [CodecType; 3] = [
-    CodecType::Bitpacked,
-    CodecType::Linear,
-    CodecType::BlockwiseLinear,
-];
-
-impl CodecType {
-    fn to_code(self) -> u8 {
-        self as u8
-    }
-
-    fn try_from_code(code: u8) -> Option<CodecType> {
-        match code {
-            0u8 => Some(CodecType::Bitpacked),
-            1u8 => Some(CodecType::Linear),
-            2u8 => Some(CodecType::BlockwiseLinear),
-            _ => None,
-        }
-    }
-
-    fn load<T: MonotonicallyMappableToU64>(
-        &self,
-        bytes: OwnedBytes,
-    ) -> io::Result<Arc<dyn ColumnValues<T>>> {
-        match self {
-            CodecType::Bitpacked => load_specific_codec::<BitpackedCodec, T>(bytes),
-            CodecType::Linear => load_specific_codec::<LinearCodec, T>(bytes),
-            CodecType::BlockwiseLinear => load_specific_codec::<BlockwiseLinearCodec, T>(bytes),
-        }
-    }
-}
-
-fn load_specific_codec<C: ColumnCodec, T: MonotonicallyMappableToU64>(
-    bytes: OwnedBytes,
-) -> io::Result<Arc<dyn ColumnValues<T>>> {
-    let reader = C::load(bytes)?;
-    let reader_typed = monotonic_map_column(
-        reader,
-        StrictlyMonotonicMappingInverter::from(StrictlyMonotonicMappingToInternal::<T>::new()),
-    );
-    Ok(Arc::new(reader_typed))
-}
-
-impl CodecType {
-    pub fn estimator(&self) -> Box<dyn ColumnCodecEstimator> {
-        match self {
-            CodecType::Bitpacked => BitpackedCodec::boxed_estimator(),
-            CodecType::Linear => LinearCodec::boxed_estimator(),
-            CodecType::BlockwiseLinear => BlockwiseLinearCodec::boxed_estimator(),
-        }
-    }
-}
-
-pub fn serialize_u64_based_column_values<T: MonotonicallyMappableToU64, F, I>(
-    vals: F,
-    codec_types: &[CodecType],
-    wrt: &mut dyn Write,
-) -> io::Result<()>
-where
-    I: Iterator<Item = T>,
-    F: Fn() -> I,
-{
-    let mut stats_collector = StatsCollector::default();
-    let mut estimators: Vec<(CodecType, Box<dyn ColumnCodecEstimator>)> =
-        Vec::with_capacity(codec_types.len());
-    for &codec_type in codec_types {
-        estimators.push((codec_type, codec_type.estimator()));
-    }
-    for val in vals() {
-        let val_u64 = val.to_u64();
-        stats_collector.collect(val_u64);
-        for (_, estimator) in &mut estimators {
-            estimator.collect(val_u64);
-        }
-    }
-    for (_, estimator) in &mut estimators {
-        estimator.finalize();
-    }
-    let stats = stats_collector.stats();
-    let (_, best_codec, best_codec_estimator) = estimators
-        .into_iter()
-        .flat_map(|(codec_type, estimator)| {
-            let num_bytes = estimator.estimate(&stats)?;
-            Some((num_bytes, codec_type, estimator))
-        })
-        .min_by_key(|(num_bytes, _, _)| *num_bytes)
-        .ok_or_else(|| {
-            io::Error::new(io::ErrorKind::InvalidData, "No available applicable codec.")
-        })?;
-    best_codec.to_code().serialize(wrt)?;
-    best_codec_estimator.serialize(
-        &stats,
-        &mut vals().map(MonotonicallyMappableToU64::to_u64),
-        wrt,
-    )?;
-    Ok(())
-}
-
-pub fn load_u64_based_column_values<T: MonotonicallyMappableToU64>(
-    mut bytes: OwnedBytes,
-) -> io::Result<Arc<dyn ColumnValues<T>>> {
-    let codec_type: CodecType = bytes
-        .get(0)
-        .copied()
-        .and_then(CodecType::try_from_code)
-        .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Failed to read codec type"))?;
-    bytes.advance(1);
-    codec_type.load(bytes)
-}
-
-/// Helper function to serialize a column (autodetect from all codecs) and then open it
-pub fn serialize_and_load_u64_based_column_values<T: MonotonicallyMappableToU64>(
-    vals: &dyn Iterable,
-    codec_types: &[CodecType],
-) -> Arc<dyn ColumnValues<T>> {
-    let mut buffer = Vec::new();
-    serialize_u64_based_column_values(|| vals.boxed_iter(), codec_types, &mut buffer).unwrap();
-    load_u64_based_column_values::<T>(OwnedBytes::new(buffer)).unwrap()
-}
-
-#[cfg(test)]
-mod tests;
--- a/columnar/src/column_values/u64_based/stats_collector.rs
+++ b/columnar/src/column_values/u64_based/stats_collector.rs
@@ -1,200 +0,0 @@
-use std::num::NonZeroU64;
-
-use fastdivide::DividerU64;
-
-use crate::column_values::Stats;
-use crate::RowId;
-
-/// Compute the gcd of two non null numbers.
-///
-/// It is recommended, but not required, to feed values such that `large >= small`.
-fn compute_gcd(mut large: NonZeroU64, mut small: NonZeroU64) -> NonZeroU64 {
-    loop {
-        let rem: u64 = large.get() % small;
-        if let Some(new_small) = NonZeroU64::new(rem) {
-            (large, small) = (small, new_small);
-        } else {
-            return small;
-        }
-    }
-}
-
-#[derive(Default)]
-pub struct StatsCollector {
-    min_max_opt: Option<(u64, u64)>,
-    num_rows: RowId,
-    // We measure the GCD of the difference between the values and the minimal value.
-    // This is the same as computing the difference between the values and the first value.
-    //
-    // This way, we can compress i64-converted-to-u64 (e.g. timestamp that were supplied in
-    // seconds, only to be converted in microseconds).
-    increment_gcd_opt: Option<(NonZeroU64, DividerU64)>,
-    first_value_opt: Option<u64>,
-}
-
-impl StatsCollector {
-    pub fn stats(&self) -> Stats {
-        let (min_value, max_value) = self.min_max_opt.unwrap_or((0u64, 0u64));
-        let increment_gcd = if let Some((increment_gcd, _)) = self.increment_gcd_opt {
-            increment_gcd
-        } else {
-            NonZeroU64::new(1u64).unwrap()
-        };
-        Stats {
-            min_value,
-            max_value,
-            num_rows: self.num_rows,
-            gcd: increment_gcd,
-        }
-    }
-
-    #[inline]
-    fn update_increment_gcd(&mut self, value: u64) {
-        let Some(first_value) = self.first_value_opt else {
-            // We set the first value and just quit.
-            self.first_value_opt = Some(value);
-            return;
-        };
-        let Some(non_zero_value) = NonZeroU64::new(value.abs_diff(first_value)) else {
-            // We can simply skip 0 values.
-            return;
-        };
-        let Some((gcd, gcd_divider)) = self.increment_gcd_opt else {
-            self.set_increment_gcd(non_zero_value);
-            return;
-        };
-        if gcd.get() == 1 {
-            // It won't see any update now.
-            return;
-        }
-        let remainder =
-            non_zero_value.get() - (gcd_divider.divide(non_zero_value.get())) * gcd.get();
-        if remainder == 0 {
-            return;
-        }
-        let new_gcd = compute_gcd(non_zero_value, gcd);
-        self.set_increment_gcd(new_gcd);
-    }
-
-    fn set_increment_gcd(&mut self, gcd: NonZeroU64) {
-        let new_divider = DividerU64::divide_by(gcd.get());
-        self.increment_gcd_opt = Some((gcd, new_divider));
-    }
-
-    pub fn collect(&mut self, value: u64) {
-        self.min_max_opt = Some(if let Some((min, max)) = self.min_max_opt {
-            (min.min(value), max.max(value))
-        } else {
-            (value, value)
-        });
-        self.num_rows += 1;
-        self.update_increment_gcd(value);
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::num::NonZeroU64;
-
-    use crate::column_values::u64_based::stats_collector::{compute_gcd, StatsCollector};
-    use crate::column_values::u64_based::Stats;
-
-    fn compute_stats(vals: impl Iterator<Item = u64>) -> Stats {
-        let mut stats_collector = StatsCollector::default();
-        for val in vals {
-            stats_collector.collect(val);
-        }
-        stats_collector.stats()
-    }
-
-    fn find_gcd(vals: impl Iterator<Item = u64>) -> u64 {
-        compute_stats(vals).gcd.get()
-    }
-
-    #[test]
-    fn test_compute_gcd() {
-        let test_compute_gcd_aux = |large, small, expected| {
-            let large = NonZeroU64::new(large).unwrap();
-            let small = NonZeroU64::new(small).unwrap();
-            let expected = NonZeroU64::new(expected).unwrap();
-            assert_eq!(compute_gcd(small, large), expected);
-            assert_eq!(compute_gcd(large, small), expected);
-        };
-        test_compute_gcd_aux(1, 4, 1);
-        test_compute_gcd_aux(2, 4, 2);
-        test_compute_gcd_aux(10, 25, 5);
-        test_compute_gcd_aux(25, 25, 25);
-    }
-
-    #[test]
-    fn test_gcd() {
-        assert_eq!(find_gcd([0].into_iter()), 1);
-        assert_eq!(find_gcd([0, 10].into_iter()), 10);
-        assert_eq!(find_gcd([10, 0].into_iter()), 10);
-        assert_eq!(find_gcd([].into_iter()), 1);
-        assert_eq!(find_gcd([15, 30, 5, 10].into_iter()), 5);
-        assert_eq!(find_gcd([15, 16, 10].into_iter()), 1);
-        assert_eq!(find_gcd([0, 5, 5, 5].into_iter()), 5);
-        assert_eq!(find_gcd([0, 0].into_iter()), 1);
-        assert_eq!(find_gcd([1, 10, 4, 1, 7, 10].into_iter()), 3);
-        assert_eq!(find_gcd([1, 10, 0, 4, 1, 7, 10].into_iter()), 1);
-    }
-
-    #[test]
-    fn test_stats() {
-        assert_eq!(
-            compute_stats([].into_iter()),
-            Stats {
-                gcd: NonZeroU64::new(1).unwrap(),
-                min_value: 0,
-                max_value: 0,
-                num_rows: 0
-            }
-        );
-        assert_eq!(
-            compute_stats([0, 1].into_iter()),
-            Stats {
-                gcd: NonZeroU64::new(1).unwrap(),
-                min_value: 0,
-                max_value: 1,
-                num_rows: 2
-            }
-        );
-        assert_eq!(
-            compute_stats([0, 1].into_iter()),
-            Stats {
-                gcd: NonZeroU64::new(1).unwrap(),
-                min_value: 0,
-                max_value: 1,
-                num_rows: 2
-            }
-        );
-        assert_eq!(
-            compute_stats([10, 20, 30].into_iter()),
-            Stats {
-                gcd: NonZeroU64::new(10).unwrap(),
-                min_value: 10,
-                max_value: 30,
-                num_rows: 3
-            }
-        );
-        assert_eq!(
-            compute_stats([10, 50, 10, 30].into_iter()),
-            Stats {
-                gcd: NonZeroU64::new(20).unwrap(),
-                min_value: 10,
-                max_value: 50,
-                num_rows: 4
-            }
-        );
-        assert_eq!(
-            compute_stats([10, 0, 30].into_iter()),
-            Stats {
-                gcd: NonZeroU64::new(10).unwrap(),
-                min_value: 0,
-                max_value: 30,
-                num_rows: 3
-            }
-        );
-    }
-}
--- a/columnar/src/columnar/column_type.rs
+++ b/columnar/src/columnar/column_type.rs
@@ -4,24 +4,22 @@ use std::net::Ipv6Addr;
 use crate::value::NumericalType;
 use crate::InvalidData;

-/// The column type represents the column type and can fit on 6-bits.
-///
-/// - bits[0..3]: Column category type.
-/// - bits[3..6]: Numerical type if necessary.
+/// The column type represents the column type.
+/// Any changes need to be propagated to `COLUMN_TYPES`.
 #[derive(Hash, Eq, PartialEq, Debug, Clone, Copy, Ord, PartialOrd)]
 #[repr(u8)]
 pub enum ColumnType {
    I64 = 0u8,
    U64 = 1u8,
    F64 = 2u8,
-    Bytes = 10u8,
-    Str = 14u8,
-    Bool = 18u8,
-    IpAddr = 22u8,
-    DateTime = 26u8,
+    Bytes = 3u8,
+    Str = 4u8,
+    Bool = 5u8,
+    IpAddr = 6u8,
+    DateTime = 7u8,
 }

-#[cfg(test)]
+// The order needs to match _exactly_ the order in the enum
 const COLUMN_TYPES: [ColumnType; 8] = [
    ColumnType::I64,
    ColumnType::U64,
@@ -39,18 +37,7 @@ impl ColumnType {
    }

    pub(crate) fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> {
-        use ColumnType::*;
-        match code {
-            0u8 => Ok(I64),
-            1u8 => Ok(U64),
-            2u8 => Ok(F64),
-            10u8 => Ok(Bytes),
-            14u8 => Ok(Str),
-            18u8 => Ok(Bool),
-            22u8 => Ok(IpAddr),
-            26u8 => Ok(Self::DateTime),
-            _ => Err(InvalidData),
-        }
+        COLUMN_TYPES.get(code as usize).copied().ok_or(InvalidData)
    }
 }

@@ -143,70 +130,20 @@ impl HasAssociatedColumnType for Ipv6Addr {
    }
 }

-/// Column types are grouped into different categories that
-/// corresponds to the different types of `JsonValue` types.
-///
-/// The columnar writer will apply coercion rules to make sure that
-/// at most one column exist per `ColumnTypeCategory`.
-///
-/// See also [README.md].
-#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Debug)]
-#[repr(u8)]
-pub enum ColumnTypeCategory {
-    Bool,
-    Str,
-    Numerical,
-    DateTime,
-    Bytes,
-    IpAddr,
-}
-
-impl From<ColumnType> for ColumnTypeCategory {
-    fn from(column_type: ColumnType) -> Self {
-        match column_type {
-            ColumnType::I64 => ColumnTypeCategory::Numerical,
-            ColumnType::U64 => ColumnTypeCategory::Numerical,
-            ColumnType::F64 => ColumnTypeCategory::Numerical,
-            ColumnType::Bytes => ColumnTypeCategory::Bytes,
-            ColumnType::Str => ColumnTypeCategory::Str,
-            ColumnType::Bool => ColumnTypeCategory::Bool,
-            ColumnType::IpAddr => ColumnTypeCategory::IpAddr,
-            ColumnType::DateTime => ColumnTypeCategory::DateTime,
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
-    use std::collections::HashSet;
-
    use super::*;
    use crate::Cardinality;

    #[test]
    fn test_column_type_to_code() {
-        let mut column_type_set: HashSet<ColumnType> = HashSet::new();
-        for code in u8::MIN..=u8::MAX {
-            if let Ok(column_type) = ColumnType::try_from_code(code) {
-                assert_eq!(column_type.to_code(), code);
-                assert!(column_type_set.insert(column_type));
+        for (code, expected_column_type) in super::COLUMN_TYPES.iter().copied().enumerate() {
+            if let Ok(column_type) = ColumnType::try_from_code(code as u8) {
+                assert_eq!(column_type, expected_column_type);
            }
        }
-        assert_eq!(column_type_set.len(), super::COLUMN_TYPES.len());
-    }
-
-    #[test]
-    fn test_column_category_sort_consistent_with_column_type_sort() {
-        // This is a very important property because we
-        // we need to serialize colunmn in the right order.
-        let mut column_types: Vec<ColumnType> = super::COLUMN_TYPES.iter().copied().collect();
-        column_types.sort_by_key(|col| col.to_code());
-        let column_categories: Vec<ColumnTypeCategory> = column_types
-            .into_iter()
-            .map(ColumnTypeCategory::from)
-            .collect();
-        for (prev, next) in column_categories.iter().zip(column_categories.iter()) {
-            assert!(prev <= next);
+        for code in COLUMN_TYPES.len() as u8..=u8::MAX {
+            assert!(ColumnType::try_from_code(code as u8).is_err());
        }
    }

--- a/columnar/src/columnar/merge.rs
+++ b/columnar/src/columnar/merge.rs
@@ -0,0 +1,208 @@
+use std::collections::HashMap;
+use std::io;
+
+use crate::columnar::ColumnarReader;
+use crate::dynamic_column::DynamicColumn;
+use crate::ColumnType;
+
+pub enum MergeDocOrder {
+    /// Columnar tables are simply stacked one above the other.
+    /// If the i-th columnar_readers has n_rows_i rows, then
+    /// in the resulting columnar,
+    /// rows [r0..n_row_0) contains the row of columnar_readers[0], in ordder
+    /// rows [n_row_0..n_row_0 + n_row_1 contains the row of columnar_readers[1], in order.
+    /// ..
+    Stack,
+    /// Some more complex mapping, that can interleaves rows from the different readers and
+    /// possibly drop rows.
+    Complex(()),
+}
+
+pub fn merge_columnar(
+    _columnar_readers: &[ColumnarReader],
+    mapping: MergeDocOrder,
+    _output: &mut impl io::Write,
+) -> io::Result<()> {
+    match mapping {
+        MergeDocOrder::Stack => {
+            // implement me :)
+            todo!();
+        }
+        MergeDocOrder::Complex(_) => {
+            // for later
+            todo!();
+        }
+    }
+}
+
+/// Column types are grouped into different categories.
+/// After merge, all columns belonging to the same category are coerced to
+/// the same column type.
+///
+/// In practise, today, only Numerical colummns are coerced into one type today.
+///
+/// See also [README.md].
+#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
+#[repr(u8)]
+enum ColumnTypeCategory {
+    Bool,
+    Str,
+    Numerical,
+    DateTime,
+    Bytes,
+    IpAddr,
+}
+
+impl From<ColumnType> for ColumnTypeCategory {
+    fn from(column_type: ColumnType) -> Self {
+        match column_type {
+            ColumnType::I64 => ColumnTypeCategory::Numerical,
+            ColumnType::U64 => ColumnTypeCategory::Numerical,
+            ColumnType::F64 => ColumnTypeCategory::Numerical,
+            ColumnType::Bytes => ColumnTypeCategory::Bytes,
+            ColumnType::Str => ColumnTypeCategory::Str,
+            ColumnType::Bool => ColumnTypeCategory::Bool,
+            ColumnType::IpAddr => ColumnTypeCategory::IpAddr,
+            ColumnType::DateTime => ColumnTypeCategory::DateTime,
+        }
+    }
+}
+
+fn collect_columns(
+    columnar_readers: &[&ColumnarReader],
+) -> io::Result<HashMap<String, HashMap<ColumnTypeCategory, Vec<DynamicColumn>>>> {
+    // Each column name may have multiple types of column associated.
+    // For merging we are interested in the same column type category since they can be merged.
+    let mut field_name_to_group: HashMap<String, HashMap<ColumnTypeCategory, Vec<DynamicColumn>>> =
+        HashMap::new();
+
+    for columnar_reader in columnar_readers {
+        let column_name_and_handle = columnar_reader.list_columns()?;
+        for (column_name, handle) in column_name_and_handle {
+            let column_type_to_handles = field_name_to_group
+                .entry(column_name.to_string())
+                .or_default();
+
+            let columns = column_type_to_handles
+                .entry(handle.column_type().into())
+                .or_default();
+            columns.push(handle.open()?);
+        }
+    }
+
+    normalize_columns(&mut field_name_to_group);
+
+    Ok(field_name_to_group)
+}
+
+/// Coerce numerical type columns to the same type
+/// TODO rename to `coerce_columns`
+fn normalize_columns(map: &mut HashMap<String, HashMap<ColumnTypeCategory, Vec<DynamicColumn>>>) {
+    for (_field_name, type_category_to_columns) in map.iter_mut() {
+        for (type_category, columns) in type_category_to_columns {
+            if type_category == &ColumnTypeCategory::Numerical {
+                let casted_columns = cast_to_common_numerical_column(&columns);
+                *columns = casted_columns;
+            }
+        }
+    }
+}
+
+/// Receives a list of columns of numerical types (u64, i64, f64)
+///
+/// Returns a list of `DynamicColumn` which are all of the same numerical type
+fn cast_to_common_numerical_column(columns: &[DynamicColumn]) -> Vec<DynamicColumn> {
+    assert!(columns
+        .iter()
+        .all(|column| column.column_type().numerical_type().is_some()));
+    let coerce_to_i64: Vec<_> = columns
+        .iter()
+        .map(|column| column.clone().coerce_to_i64())
+        .collect();
+
+    if coerce_to_i64.iter().all(|column| column.is_some()) {
+        return coerce_to_i64
+            .into_iter()
+            .map(|column| column.unwrap())
+            .collect();
+    }
+
+    let coerce_to_u64: Vec<_> = columns
+        .iter()
+        .map(|column| column.clone().coerce_to_u64())
+        .collect();
+
+    if coerce_to_u64.iter().all(|column| column.is_some()) {
+        return coerce_to_u64
+            .into_iter()
+            .map(|column| column.unwrap())
+            .collect();
+    }
+
+    columns
+        .iter()
+        .map(|column| {
+            column
+                .clone()
+                .coerce_to_f64()
+                .expect("couldn't cast column to f64")
+        })
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ColumnarWriter;
+
+    #[test]
+    fn test_column_coercion() {
+        // i64 type
+        let columnar1 = {
+            let mut dataframe_writer = ColumnarWriter::default();
+            dataframe_writer.record_numerical(1u32, "numbers", 1i64);
+            let mut buffer: Vec<u8> = Vec::new();
+            dataframe_writer.serialize(2, &mut buffer).unwrap();
+            ColumnarReader::open(buffer).unwrap()
+        };
+        // u64 type
+        let columnar2 = {
+            let mut dataframe_writer = ColumnarWriter::default();
+            dataframe_writer.record_numerical(1u32, "numbers", u64::MAX - 100);
+            let mut buffer: Vec<u8> = Vec::new();
+            dataframe_writer.serialize(2, &mut buffer).unwrap();
+            ColumnarReader::open(buffer).unwrap()
+        };
+
+        // f64 type
+        let columnar3 = {
+            let mut dataframe_writer = ColumnarWriter::default();
+            dataframe_writer.record_numerical(1u32, "numbers", 30.5);
+            let mut buffer: Vec<u8> = Vec::new();
+            dataframe_writer.serialize(2, &mut buffer).unwrap();
+            ColumnarReader::open(buffer).unwrap()
+        };
+
+        let column_map = collect_columns(&[&columnar1, &columnar2, &columnar3]).unwrap();
+        assert_eq!(column_map.len(), 1);
+        let cat_to_columns = column_map.get("numbers").unwrap();
+        assert_eq!(cat_to_columns.len(), 1);
+
+        let numerical = cat_to_columns.get(&ColumnTypeCategory::Numerical).unwrap();
+        assert!(numerical.iter().all(|column| column.is_f64()));
+
+        let column_map = collect_columns(&[&columnar1, &columnar1]).unwrap();
+        assert_eq!(column_map.len(), 1);
+        let cat_to_columns = column_map.get("numbers").unwrap();
+        assert_eq!(cat_to_columns.len(), 1);
+        let numerical = cat_to_columns.get(&ColumnTypeCategory::Numerical).unwrap();
+        assert!(numerical.iter().all(|column| column.is_i64()));
+
+        let column_map = collect_columns(&[&columnar2, &columnar2]).unwrap();
+        assert_eq!(column_map.len(), 1);
+        let cat_to_columns = column_map.get("numbers").unwrap();
+        assert_eq!(cat_to_columns.len(), 1);
+        let numerical = cat_to_columns.get(&ColumnTypeCategory::Numerical).unwrap();
+        assert!(numerical.iter().all(|column| column.is_u64()));
+    }
+}
--- a/columnar/src/columnar/merge/merge_dict_column.rs
+++ b/columnar/src/columnar/merge/merge_dict_column.rs
@@ -1,114 +0,0 @@
-use std::io::{self, Write};
-
-use common::CountingWriter;
-use itertools::Itertools;
-use sstable::{SSTable, TermOrdinal};
-
-use super::term_merger::TermMerger;
-use crate::column_index::{serialize_column_index, SerializableColumnIndex};
-use crate::column_values::{serialize_u64_based_column_values, CodecType};
-use crate::BytesColumn;
-
-// Serialize [Dictionary, Column, dictionary num bytes U32::LE]
-// Column: [Column Index, Column Values, column index num bytes U32::LE]
-pub fn merge_bytes_or_str_column(
-    column_index: SerializableColumnIndex<'_>,
-    bytes_columns: &[BytesColumn],
-    output: &mut impl Write,
-) -> io::Result<()> {
-    // Serialize dict and generate mapping for values
-    let mut output = CountingWriter::wrap(output);
-    let term_ord_mapping = serialize_merged_dict(bytes_columns, &mut output)?;
-    let dictionary_num_bytes: u32 = output.written_bytes() as u32;
-    let output = output.finish();
-
-    serialize_bytes_or_str_column(column_index, bytes_columns, &term_ord_mapping, output)?;
-
-    output.write_all(&dictionary_num_bytes.to_le_bytes())?;
-    Ok(())
-}
-
-fn serialize_bytes_or_str_column(
-    column_index: SerializableColumnIndex<'_>,
-    bytes_columns: &[BytesColumn],
-    term_ord_mapping: &TermOrdinalMapping,
-    output: &mut impl Write,
-) -> io::Result<()> {
-    let column_index_num_bytes = serialize_column_index(column_index, output)?;
-
-    let column_values = move || {
-        let iter = bytes_columns
-            .iter()
-            .enumerate()
-            .flat_map(|(segment_ord, byte_column)| {
-                let segment_ord = term_ord_mapping.get_segment(segment_ord);
-                byte_column
-                    .ords()
-                    .values
-                    .iter()
-                    .map(move |term_ord| segment_ord[term_ord as usize])
-            });
-        iter
-    };
-
-    serialize_u64_based_column_values(
-        column_values,
-        &[CodecType::Bitpacked, CodecType::BlockwiseLinear],
-        output,
-    )?;
-
-    output.write_all(&column_index_num_bytes.to_le_bytes())?;
-
-    Ok(())
-}
-
-fn serialize_merged_dict(
-    bytes_columns: &[BytesColumn],
-    output: &mut impl Write,
-) -> io::Result<TermOrdinalMapping> {
-    let mut term_ord_mapping = TermOrdinalMapping::default();
-
-    let mut field_term_streams = Vec::new();
-    for column in bytes_columns {
-        term_ord_mapping.add_segment(column.dictionary.num_terms());
-        let terms = column.dictionary.stream()?;
-        field_term_streams.push(terms);
-    }
-
-    let mut merged_terms = TermMerger::new(field_term_streams);
-    let mut sstable_builder = sstable::VoidSSTable::writer(output);
-
-    let mut current_term_ord = 0;
-    while merged_terms.advance() {
-        let term_bytes: &[u8] = merged_terms.key();
-
-        sstable_builder.insert(term_bytes, &())?;
-        for (segment_ord, from_term_ord) in merged_terms.matching_segments() {
-            term_ord_mapping.register_from_to(segment_ord, from_term_ord, current_term_ord);
-        }
-        current_term_ord += 1;
-    }
-    sstable_builder.finish()?;
-
-    Ok(term_ord_mapping)
-}
-
-#[derive(Default)]
-struct TermOrdinalMapping {
-    per_segment_new_term_ordinals: Vec<Vec<TermOrdinal>>,
-}
-
-impl TermOrdinalMapping {
-    fn add_segment(&mut self, max_term_ord: usize) {
-        self.per_segment_new_term_ordinals
-            .push(vec![TermOrdinal::default(); max_term_ord as usize]);
-    }
-
-    fn register_from_to(&mut self, segment_ord: usize, from_ord: TermOrdinal, to_ord: TermOrdinal) {
-        self.per_segment_new_term_ordinals[segment_ord][from_ord as usize] = to_ord;
-    }
-
-    fn get_segment(&self, segment_ord: usize) -> &[TermOrdinal] {
-        &(self.per_segment_new_term_ordinals[segment_ord])[..]
-    }
-}
--- a/columnar/src/columnar/merge/merge_mapping.rs
+++ b/columnar/src/columnar/merge/merge_mapping.rs
@@ -1,60 +0,0 @@
-use std::ops::Range;
-
-use crate::{column, ColumnarReader, RowId};
-
-pub struct StackMergeOrder {
-    // This does not start at 0. The first row is the number of
-    // rows in the first columnar.
-    cumulated_row_ids: Vec<RowId>,
-}
-
-impl StackMergeOrder {
-    pub fn from_columnars(columnars: &[&ColumnarReader]) -> StackMergeOrder {
-        let mut cumulated_row_ids: Vec<RowId> = Vec::with_capacity(columnars.len());
-        let mut cumulated_row_id = 0;
-        for columnar in columnars {
-            cumulated_row_id += columnar.num_rows();
-            cumulated_row_ids.push(cumulated_row_id);
-        }
-        StackMergeOrder { cumulated_row_ids }
-    }
-
-    pub fn num_rows(&self) -> RowId {
-        self.cumulated_row_ids.last().copied().unwrap_or(0)
-    }
-
-    pub fn offset(&self, columnar_id: usize) -> RowId {
-        if columnar_id == 0 {
-            return 0;
-        }
-        self.cumulated_row_ids[columnar_id - 1]
-    }
-
-    pub fn columnar_range(&self, columnar_id: usize) -> Range<RowId> {
-        self.offset(columnar_id)..self.offset(columnar_id + 1)
-    }
-}
-
-pub enum MergeRowOrder {
-    /// Columnar tables are simply stacked one above the other.
-    /// If the i-th columnar_readers has n_rows_i rows, then
-    /// in the resulting columnar,
-    /// rows [r0..n_row_0) contains the row of columnar_readers[0], in ordder
-    /// rows [n_row_0..n_row_0 + n_row_1 contains the row of columnar_readers[1], in order.
-    /// ..
-    Stack(StackMergeOrder),
-    /// Some more complex mapping, that can interleaves rows from the different readers and
-    /// possibly drop rows.
-    Complex(()),
-}
-
-impl MergeRowOrder {
-    pub fn num_rows(&self) -> RowId {
-        match self {
-            MergeRowOrder::Stack(stack_row_order) => stack_row_order.num_rows(),
-            MergeRowOrder::Complex(_) => {
-                todo!()
-            }
-        }
-    }
-}
--- a/columnar/src/columnar/merge/mod.rs
+++ b/columnar/src/columnar/merge/mod.rs
@@ -1,231 +0,0 @@
-mod merge_dict_column;
-mod merge_mapping;
-mod term_merger;
-
-// mod sorted_doc_id_column;
-
-use std::collections::{BTreeMap, HashMap, HashSet};
-use std::io;
-use std::net::Ipv6Addr;
-use std::sync::Arc;
-
-pub use merge_mapping::{MergeRowOrder, StackMergeOrder};
-
-use super::writer::ColumnarSerializer;
-use crate::column::{serialize_column_mappable_to_u128, serialize_column_mappable_to_u64};
-use crate::columnar::column_type::ColumnTypeCategory;
-use crate::columnar::merge::merge_dict_column::merge_bytes_or_str_column;
-use crate::columnar::writer::CompatibleNumericalTypes;
-use crate::columnar::ColumnarReader;
-use crate::dynamic_column::DynamicColumn;
-use crate::{
-    BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues,
-    NumericalType, NumericalValue,
-};
-
-pub fn merge_columnar(
-    columnar_readers: &[&ColumnarReader],
-    mapping: MergeRowOrder,
-    output: &mut impl io::Write,
-) -> io::Result<()> {
-    let mut serializer = ColumnarSerializer::new(output);
-
-    let columns_to_merge = group_columns_for_merge(columnar_readers)?;
-    for ((column_name, column_type), columns) in columns_to_merge {
-        let mut column_serializer =
-            serializer.serialize_column(column_name.as_bytes(), column_type);
-        merge_column(column_type, columns, &mapping, &mut column_serializer)?;
-    }
-    serializer.finalize(mapping.num_rows())?;
-
-    Ok(())
-}
-
-fn dynamic_column_to_u64_monotonic(dynamic_column: DynamicColumn) -> Option<Column<u64>> {
-    match dynamic_column {
-        DynamicColumn::Bool(column) => Some(column.to_u64_monotonic()),
-        DynamicColumn::I64(column) => Some(column.to_u64_monotonic()),
-        DynamicColumn::U64(column) => Some(column.to_u64_monotonic()),
-        DynamicColumn::F64(column) => Some(column.to_u64_monotonic()),
-        DynamicColumn::DateTime(column) => Some(column.to_u64_monotonic()),
-        DynamicColumn::IpAddr(_) | DynamicColumn::Bytes(_) | DynamicColumn::Str(_) => None,
-    }
-}
-
-pub fn merge_column(
-    column_type: ColumnType,
-    columns: Vec<Option<DynamicColumn>>,
-    merge_row_order: &MergeRowOrder,
-    wrt: &mut impl io::Write,
-) -> io::Result<()> {
-    match column_type {
-        ColumnType::I64
-        | ColumnType::U64
-        | ColumnType::F64
-        | ColumnType::DateTime
-        | ColumnType::Bool => {
-            let mut column_indexes: Vec<Option<ColumnIndex>> = Vec::with_capacity(columns.len());
-            let mut column_values: Vec<Arc<dyn ColumnValues>> = Vec::with_capacity(columns.len());
-            for dynamic_column_opt in columns {
-                if let Some(Column { idx, values }) =
-                    dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic)
-                {
-                    column_indexes.push(Some(idx));
-                    column_values.push(values);
-                } else {
-                    column_indexes.push(None);
-                }
-            }
-            let merged_column_index =
-                crate::column_index::stack_column_index(&column_indexes[..], merge_row_order);
-            let stacked_columns_iterable = || column_values
-                .iter()
-                .flat_map(|column| column.iter());
-            serialize_column_mappable_to_u64(merged_column_index, &stacked_columns_iterable, wrt)?;
-        }
-        ColumnType::IpAddr => {
-            let mut column_indexes: Vec<Option<ColumnIndex>> = Vec::with_capacity(columns.len());
-            let mut column_values: Vec<Arc<dyn ColumnValues<Ipv6Addr>>> =
-                Vec::with_capacity(columns.len());
-            let mut num_values = 0;
-            for dynamic_column_opt in columns {
-                if let Some(DynamicColumn::IpAddr(Column { idx, values })) = dynamic_column_opt {
-                    num_values += values.num_vals();
-                    column_indexes.push(Some(idx));
-                    column_values.push(values);
-                } else {
-                    column_indexes.push(None);
-                }
-            }
-            let merged_column_index =
-                crate::column_index::stack_column_index(&column_indexes[..], merge_row_order);
-            serialize_column_mappable_to_u128(
-                merged_column_index,
-                &|| {
-                    column_values
-                        .iter()
-                        .flat_map(|column_value| column_value.iter())
-                },
-                num_values,
-                wrt,
-            )?;
-        }
-        ColumnType::Bytes | ColumnType::Str => {
-            let mut column_indexes: Vec<Option<ColumnIndex>> = Vec::with_capacity(columns.len());
-            let mut bytes_columns: Vec<BytesColumn> = Vec::with_capacity(columns.len());
-            for dynamic_column_opt in columns {
-                match dynamic_column_opt {
-                    Some(DynamicColumn::Str(str_column)) => {
-                        column_indexes.push(Some(str_column.term_ord_column.idx.clone()));
-                        bytes_columns.push(str_column.into());
-                    }
-                    Some(DynamicColumn::Bytes(bytes_column)) => {
-                        column_indexes.push(Some(bytes_column.term_ord_column.idx.clone()));
-                        bytes_columns.push(bytes_column);
-                    }
-                    _ => column_indexes.push(None),
-                }
-            }
-            let merged_column_index =
-                crate::column_index::stack_column_index(&column_indexes[..], merge_row_order);
-            merge_bytes_or_str_column(merged_column_index, &bytes_columns, wrt)?;
-        }
-    }
-    Ok(())
-}
-
-fn group_columns_for_merge(
-    columnar_readers: &[&ColumnarReader],
-) -> io::Result<BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>>> {
-    // Each column name may have multiple types of column associated.
-    // For merging we are interested in the same column type category since they can be merged.
-    let mut columns_grouped: HashMap<(String, ColumnTypeCategory), Vec<Option<DynamicColumn>>> =
-        HashMap::new();
-
-    let num_columnars = columnar_readers.len();
-
-    for (columnar_id, columnar_reader) in columnar_readers.iter().enumerate() {
-        let column_name_and_handle = columnar_reader.list_columns()?;
-        for (column_name, handle) in column_name_and_handle {
-            let column_type_category: ColumnTypeCategory = handle.column_type().into();
-            let columns = columns_grouped
-                .entry((column_name, column_type_category))
-                .or_insert_with(|| vec![None; num_columnars]);
-            let column = handle.open()?;
-            columns[columnar_id] = Some(column);
-        }
-    }
-
-    let mut merge_columns: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
-        BTreeMap::default();
-
-    for ((column_name, col_category), mut columns) in columns_grouped {
-        if col_category == ColumnTypeCategory::Numerical {
-            coerce_numerical_columns_to_same_type(&mut columns);
-        }
-        let column_type = columns
-            .iter()
-            .flatten()
-            .map(|col| col.column_type())
-            .next()
-            .unwrap();
-        merge_columns.insert((column_name, column_type), columns);
-    }
-
-    Ok(merge_columns)
-}
-
-/// Coerce a set of numerical columns to the same type.
-///
-/// If all columns are already from the same type, keep this type
-/// (even if they could all be coerced to i64).
-fn coerce_numerical_columns_to_same_type(columns: &mut [Option<DynamicColumn>]) {
-    let mut column_types: HashSet<NumericalType> = HashSet::default();
-    let mut compatible_numerical_types = CompatibleNumericalTypes::default();
-    for column in columns.iter().flatten() {
-        let min_value: NumericalValue;
-        let max_value: NumericalValue;
-        match column {
-            DynamicColumn::I64(column) => {
-                min_value = column.min_value().into();
-                max_value = column.max_value().into();
-            }
-            DynamicColumn::U64(column) => {
-                min_value = column.min_value().into();
-                max_value = column.min_value().into();
-            }
-            DynamicColumn::F64(column) => {
-                min_value = column.min_value().into();
-                max_value = column.min_value().into();
-            }
-            DynamicColumn::Bool(_)
-            | DynamicColumn::IpAddr(_)
-            | DynamicColumn::DateTime(_)
-            | DynamicColumn::Bytes(_)
-            | DynamicColumn::Str(_) => {
-                panic!("We expected only numerical columns.");
-            }
-        }
-        column_types.insert(column.column_type().numerical_type().unwrap());
-        compatible_numerical_types.accept_value(min_value);
-        compatible_numerical_types.accept_value(max_value);
-    }
-    if column_types.len() <= 1 {
-        // No need to do anything. The columns are already all from the same type.
-        // This is necessary to let use force a given type.
-
-        // TODO This works in a world where we do not allow a change of schema,
-        // but in the future, we will have to pass some kind of schema to enforce
-        // the logic.
-        return;
-    }
-    let coerce_type = compatible_numerical_types.to_numerical_type();
-    for column_opt in columns.iter_mut() {
-        if let Some(column) = column_opt.take() {
-            *column_opt = column.coerce_numerical(coerce_type);
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests;
--- a/columnar/src/columnar/merge/term_merger.rs
+++ b/columnar/src/columnar/merge/term_merger.rs
@@ -1,107 +0,0 @@
-use std::cmp::Ordering;
-use std::collections::BinaryHeap;
-
-use sstable::TermOrdinal;
-
-use crate::Streamer;
-
-pub struct HeapItem<'a> {
-    pub streamer: Streamer<'a>,
-    pub segment_ord: usize,
-}
-
-impl<'a> PartialEq for HeapItem<'a> {
-    fn eq(&self, other: &Self) -> bool {
-        self.segment_ord == other.segment_ord
-    }
-}
-
-impl<'a> Eq for HeapItem<'a> {}
-
-impl<'a> PartialOrd for HeapItem<'a> {
-    fn partial_cmp(&self, other: &HeapItem<'a>) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl<'a> Ord for HeapItem<'a> {
-    fn cmp(&self, other: &HeapItem<'a>) -> Ordering {
-        (&other.streamer.key(), &other.segment_ord).cmp(&(&self.streamer.key(), &self.segment_ord))
-    }
-}
-
-/// Given a list of sorted term streams,
-/// returns an iterator over sorted unique terms.
-///
-/// The item yield is actually a pair with
-/// - the term
-/// - a slice with the ordinal of the segments containing
-/// the terms.
-pub struct TermMerger<'a> {
-    heap: BinaryHeap<HeapItem<'a>>,
-    current_streamers: Vec<HeapItem<'a>>,
-}
-
-impl<'a> TermMerger<'a> {
-    /// Stream of merged term dictionary
-    pub fn new(streams: Vec<Streamer<'a>>) -> TermMerger<'a> {
-        TermMerger {
-            heap: BinaryHeap::new(),
-            current_streamers: streams
-                .into_iter()
-                .enumerate()
-                .map(|(ord, streamer)| HeapItem {
-                    streamer,
-                    segment_ord: ord,
-                })
-                .collect(),
-        }
-    }
-
-    pub(crate) fn matching_segments<'b: 'a>(
-        &'b self,
-    ) -> impl 'b + Iterator<Item = (usize, TermOrdinal)> {
-        self.current_streamers
-            .iter()
-            .map(|heap_item| (heap_item.segment_ord, heap_item.streamer.term_ord()))
-    }
-
-    fn advance_segments(&mut self) {
-        let streamers = &mut self.current_streamers;
-        let heap = &mut self.heap;
-        for mut heap_item in streamers.drain(..) {
-            if heap_item.streamer.advance() {
-                heap.push(heap_item);
-            }
-        }
-    }
-
-    /// Advance the term iterator to the next term.
-    /// Returns true if there is indeed another term
-    /// False if there is none.
-    pub fn advance(&mut self) -> bool {
-        self.advance_segments();
-        if let Some(head) = self.heap.pop() {
-            self.current_streamers.push(head);
-            while let Some(next_streamer) = self.heap.peek() {
-                if self.current_streamers[0].streamer.key() != next_streamer.streamer.key() {
-                    break;
-                }
-                let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
-                self.current_streamers.push(next_heap_it);
-            }
-            true
-        } else {
-            false
-        }
-    }
-
-    /// Returns the current term.
-    ///
-    /// This method may be called
-    /// if and only if advance() has been called before
-    /// and "true" was returned.
-    pub fn key(&self) -> &[u8] {
-        self.current_streamers[0].streamer.key()
-    }
-}
--- a/columnar/src/columnar/merge/tests.rs
+++ b/columnar/src/columnar/merge/tests.rs
@@ -1,252 +0,0 @@
-use super::*;
-use crate::{Cardinality, ColumnarWriter, HasAssociatedColumnType, RowId};
-
-fn make_columnar<T: Into<NumericalValue> + HasAssociatedColumnType + Copy>(
-    column_name: &str,
-    vals: &[T],
-) -> ColumnarReader {
-    let mut dataframe_writer = ColumnarWriter::default();
-    dataframe_writer.record_column_type(column_name, T::column_type(), false);
-    for (row_id, val) in vals.iter().copied().enumerate() {
-        dataframe_writer.record_numerical(row_id as RowId, column_name, val.into());
-    }
-    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer
-        .serialize(vals.len() as RowId, &mut buffer)
-        .unwrap();
-    ColumnarReader::open(buffer).unwrap()
-}
-
-#[test]
-fn test_column_coercion_to_u64() {
-    // i64 type
-    let columnar1 = make_columnar("numbers", &[1i64]);
-    // u64 type
-    let columnar2 = make_columnar("numbers", &[u64::MAX]);
-    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
-        group_columns_for_merge(&[&columnar1, &columnar2]).unwrap();
-    assert_eq!(column_map.len(), 1);
-    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::U64)));
-}
-
-#[test]
-fn test_column_no_coercion_if_all_the_same() {
-    let columnar1 = make_columnar("numbers", &[1u64]);
-    let columnar2 = make_columnar("numbers", &[2u64]);
-    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
-        group_columns_for_merge(&[&columnar1, &columnar2]).unwrap();
-    assert_eq!(column_map.len(), 1);
-    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::U64)));
-}
-
-#[test]
-fn test_column_coercion_to_i64() {
-    let columnar1 = make_columnar("numbers", &[-1i64]);
-    let columnar2 = make_columnar("numbers", &[2u64]);
-    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
-        group_columns_for_merge(&[&columnar1, &columnar2]).unwrap();
-    assert_eq!(column_map.len(), 1);
-    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::I64)));
-}
-
-#[test]
-fn test_missing_column() {
-    let columnar1 = make_columnar("numbers", &[-1i64]);
-    let columnar2 = make_columnar("numbers2", &[2u64]);
-    let column_map: BTreeMap<(String, ColumnType), Vec<Option<DynamicColumn>>> =
-        group_columns_for_merge(&[&columnar1, &columnar2]).unwrap();
-    assert_eq!(column_map.len(), 2);
-    assert!(column_map.contains_key(&("numbers".to_string(), ColumnType::I64)));
-    {
-        let columns = column_map
-            .get(&("numbers".to_string(), ColumnType::I64))
-            .unwrap();
-        assert!(columns[0].is_some());
-        assert!(columns[1].is_none());
-    }
-    {
-        let columns = column_map
-            .get(&("numbers2".to_string(), ColumnType::U64))
-            .unwrap();
-        assert!(columns[0].is_none());
-        assert!(columns[1].is_some());
-    }
-}
-
-fn make_numerical_columnar_multiple_columns(
-    columns: &[(&str, &[&[NumericalValue]])],
-) -> ColumnarReader {
-    let mut dataframe_writer = ColumnarWriter::default();
-    for (column_name, column_values) in columns {
-        for (row_id, vals) in column_values.iter().enumerate() {
-            for val in vals.iter() {
-                dataframe_writer.record_numerical(row_id as u32, column_name, *val);
-            }
-        }
-    }
-    let num_rows = columns
-        .iter()
-        .map(|(_, val_rows)| val_rows.len() as RowId)
-        .max()
-        .unwrap_or(0u32);
-    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(num_rows, &mut buffer).unwrap();
-    ColumnarReader::open(buffer).unwrap()
-}
-
-fn make_byte_columnar_multiple_columns(columns: &[(&str, &[&[&[u8]]])]) -> ColumnarReader {
-    let mut dataframe_writer = ColumnarWriter::default();
-    for (column_name, column_values) in columns {
-        for (row_id, vals) in column_values.iter().enumerate() {
-            for val in vals.iter() {
-                dataframe_writer.record_bytes(row_id as u32, column_name, *val);
-            }
-        }
-    }
-    let num_rows = columns
-        .iter()
-        .map(|(_, val_rows)| val_rows.len() as RowId)
-        .max()
-        .unwrap_or(0u32);
-    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(num_rows, &mut buffer).unwrap();
-    ColumnarReader::open(buffer).unwrap()
-}
-
-fn make_text_columnar_multiple_columns(columns: &[(&str, &[&[&str]])]) -> ColumnarReader {
-    let mut dataframe_writer = ColumnarWriter::default();
-    for (column_name, column_values) in columns {
-        for (row_id, vals) in column_values.iter().enumerate() {
-            for val in vals.iter() {
-                dataframe_writer.record_str(row_id as u32, column_name, *val);
-            }
-        }
-    }
-    let num_rows = columns
-        .iter()
-        .map(|(_, val_rows)| val_rows.len() as RowId)
-        .max()
-        .unwrap_or(0u32);
-    let mut buffer: Vec<u8> = Vec::new();
-    dataframe_writer.serialize(num_rows, &mut buffer).unwrap();
-    ColumnarReader::open(buffer).unwrap()
-}
-
-#[test]
-fn test_merge_columnar_numbers() {
-    let columnar1 =
-        make_numerical_columnar_multiple_columns(&[("numbers", &[&[NumericalValue::from(-1f64)]])]);
-    let columnar2 = make_numerical_columnar_multiple_columns(&[(
-        "numbers",
-        &[&[], &[NumericalValue::from(-3f64)]],
-    )]);
-    let mut buffer = Vec::new();
-    let columnars = &[&columnar1, &columnar2];
-    let stack_merge_order = StackMergeOrder::from_columnars(columnars);
-    crate::columnar::merge_columnar(
-        columnars,
-        MergeRowOrder::Stack(stack_merge_order),
-        &mut buffer,
-    )
-    .unwrap();
-    let columnar_reader = ColumnarReader::open(buffer).unwrap();
-    assert_eq!(columnar_reader.num_rows(), 3);
-    assert_eq!(columnar_reader.num_columns(), 1);
-    let cols = columnar_reader.read_columns("numbers").unwrap();
-    let dynamic_column = cols[0].open().unwrap();
-    let DynamicColumn::F64(vals) = dynamic_column else { panic!() };
-    assert_eq!(vals.get_cardinality(), Cardinality::Optional);
-    assert_eq!(vals.first(0u32), Some(-1f64));
-    assert_eq!(vals.first(1u32), None);
-    assert_eq!(vals.first(2u32), Some(-3f64));
-}
-
-#[test]
-fn test_merge_columnar_texts() {
-    let columnar1 = make_text_columnar_multiple_columns(&[("texts", &[&["a"]])]);
-    let columnar2 = make_text_columnar_multiple_columns(&[("texts", &[&[], &["b"]])]);
-    let mut buffer = Vec::new();
-    let columnars = &[&columnar1, &columnar2];
-    let stack_merge_order = StackMergeOrder::from_columnars(columnars);
-    crate::columnar::merge_columnar(
-        columnars,
-        MergeRowOrder::Stack(stack_merge_order),
-        &mut buffer,
-    )
-    .unwrap();
-    let columnar_reader = ColumnarReader::open(buffer).unwrap();
-    assert_eq!(columnar_reader.num_rows(), 3);
-    assert_eq!(columnar_reader.num_columns(), 1);
-    let cols = columnar_reader.read_columns("texts").unwrap();
-    let dynamic_column = cols[0].open().unwrap();
-    let DynamicColumn::Str(vals) = dynamic_column else { panic!() };
-    let get_str_for_ord = |ord| {
-        let mut out = String::new();
-        vals.ord_to_str(ord, &mut out).unwrap();
-        out
-    };
-
-    assert_eq!(vals.dictionary.num_terms(), 2);
-    assert_eq!(get_str_for_ord(0), "a");
-    assert_eq!(get_str_for_ord(1), "b");
-
-    let get_str_for_row = |row_id| {
-        let term_ords: Vec<u64> = vals.term_ords(row_id).collect();
-        assert!(term_ords.len() <= 1);
-        let mut out = String::new();
-        if term_ords.len() == 1 {
-            vals.ord_to_str(term_ords[0], &mut out).unwrap();
-        }
-        out
-    };
-
-    assert_eq!(get_str_for_row(0), "a");
-    assert_eq!(get_str_for_row(1), "");
-    assert_eq!(get_str_for_row(2), "b");
-}
-
-#[test]
-fn test_merge_columnar_byte() {
-    let columnar1 = make_byte_columnar_multiple_columns(&[("bytes", &[&[b"bbbb"], &[b"baaa"]])]);
-    let columnar2 = make_byte_columnar_multiple_columns(&[("bytes", &[&[], &[b"a"]])]);
-    let mut buffer = Vec::new();
-    let columnars = &[&columnar1, &columnar2];
-    let stack_merge_order = StackMergeOrder::from_columnars(columnars);
-    crate::columnar::merge_columnar(
-        columnars,
-        MergeRowOrder::Stack(stack_merge_order),
-        &mut buffer,
-    )
-    .unwrap();
-    let columnar_reader = ColumnarReader::open(buffer).unwrap();
-    assert_eq!(columnar_reader.num_rows(), 4);
-    assert_eq!(columnar_reader.num_columns(), 1);
-    let cols = columnar_reader.read_columns("bytes").unwrap();
-    let dynamic_column = cols[0].open().unwrap();
-    let DynamicColumn::Bytes(vals) = dynamic_column else { panic!() };
-    let get_bytes_for_ord = |ord| {
-        let mut out = Vec::new();
-        vals.ord_to_bytes(ord, &mut out).unwrap();
-        out
-    };
-
-    assert_eq!(vals.dictionary.num_terms(), 3);
-    assert_eq!(get_bytes_for_ord(0), b"a");
-    assert_eq!(get_bytes_for_ord(1), b"baaa");
-    assert_eq!(get_bytes_for_ord(2), b"bbbb");
-
-    let get_bytes_for_row = |row_id| {
-        let term_ords: Vec<u64> = vals.term_ords(row_id).collect();
-        assert!(term_ords.len() <= 1);
-        let mut out = Vec::new();
-        if term_ords.len() == 1 {
-            vals.ord_to_bytes(term_ords[0], &mut out).unwrap();
-        }
-        out
-    };
-
-    assert_eq!(get_bytes_for_row(0), b"bbbb");
-    assert_eq!(get_bytes_for_row(1), b"baaa");
-    assert_eq!(get_bytes_for_row(2), b"");
-    assert_eq!(get_bytes_for_row(3), b"a");
-}
--- a/columnar/src/columnar/merge_index.rs
+++ b/columnar/src/columnar/merge_index.rs
@@ -1 +0,0 @@
-
--- a/columnar/src/columnar/mod.rs
+++ b/columnar/src/columnar/mod.rs
@@ -1,11 +1,10 @@
 mod column_type;
 mod format_version;
 mod merge;
-mod merge_index;
 mod reader;
 mod writer;

 pub use column_type::{ColumnType, HasAssociatedColumnType};
-pub use merge::{merge_columnar, MergeRowOrder, StackMergeOrder};
+pub use merge::{merge_columnar, MergeDocOrder};
 pub use reader::ColumnarReader;
 pub use writer::ColumnarWriter;
--- a/columnar/src/columnar/reader/mod.rs
+++ b/columnar/src/columnar/reader/mod.rs
@@ -6,7 +6,6 @@ use sstable::{Dictionary, RangeSSTable};

 use crate::columnar::{format_version, ColumnType};
 use crate::dynamic_column::DynamicColumnHandle;
-use crate::RowId;

 fn io_invalid_data(msg: String) -> io::Error {
    io::Error::new(io::ErrorKind::InvalidData, msg)
@@ -14,11 +13,9 @@ fn io_invalid_data(msg: String) -> io::Error {

 /// The ColumnarReader makes it possible to access a set of columns
 /// associated to field names.
-#[derive(Clone)]
 pub struct ColumnarReader {
    column_dictionary: Dictionary<RangeSSTable>,
    column_data: FileSlice,
-    num_rows: RowId,
 }

 impl ColumnarReader {
@@ -30,27 +27,23 @@ impl ColumnarReader {

    fn open_inner(file_slice: FileSlice) -> io::Result<ColumnarReader> {
        let (file_slice_without_sstable_len, footer_slice) = file_slice
-            .split_from_end(mem::size_of::<u64>() + 4 + format_version::VERSION_FOOTER_NUM_BYTES);
+            .split_from_end(mem::size_of::<u64>() + format_version::VERSION_FOOTER_NUM_BYTES);
        let footer_bytes = footer_slice.read_bytes()?;
-        let sstable_len = u64::deserialize(&mut &footer_bytes[0..8])?;
-        let num_rows = u32::deserialize(&mut &footer_bytes[8..12])?;
+        let (mut sstable_len_bytes, version_footer_bytes) =
+            footer_bytes.rsplit(format_version::VERSION_FOOTER_NUM_BYTES);
        let version_footer_bytes: [u8; format_version::VERSION_FOOTER_NUM_BYTES] =
-            footer_bytes[12..].try_into().unwrap();
+            version_footer_bytes.as_slice().try_into().unwrap();
        let _version = format_version::parse_footer(version_footer_bytes)?;
+        let sstable_len = u64::deserialize(&mut sstable_len_bytes)?;
        let (column_data, sstable) =
            file_slice_without_sstable_len.split_from_end(sstable_len as usize);
        let column_dictionary = Dictionary::open(sstable)?;
        Ok(ColumnarReader {
            column_dictionary,
            column_data,
-            num_rows,
        })
    }

-    pub fn num_rows(&self) -> RowId {
-        self.num_rows
-    }
-
    // TODO Add unit tests
    pub fn list_columns(&self) -> io::Result<Vec<(String, DynamicColumnHandle)>> {
        let mut stream = self.column_dictionary.stream()?;
--- a/columnar/src/columnar/writer/column_writers.rs
+++ b/columnar/src/columnar/writer/column_writers.rs
@@ -114,7 +114,7 @@ impl NumericalColumnWriter {
 /// State used to store what types are still acceptable
 /// after having seen a set of numerical values.
 #[derive(Clone, Copy)]
-pub(crate) enum CompatibleNumericalTypes {
+enum CompatibleNumericalTypes {
    Dynamic {
        all_values_within_i64_range: bool,
        all_values_within_u64_range: bool,
@@ -132,7 +132,7 @@ impl Default for CompatibleNumericalTypes {
 }

 impl CompatibleNumericalTypes {
-    pub fn is_type_accepted(&self, numerical_type: NumericalType) -> bool {
+    fn is_type_accepted(&self, numerical_type: NumericalType) -> bool {
        match self {
            CompatibleNumericalTypes::Dynamic {
                all_values_within_i64_range,
@@ -148,7 +148,7 @@ impl CompatibleNumericalTypes {
        }
    }

-    pub fn accept_value(&mut self, numerical_value: NumericalValue) {
+    fn accept_value(&mut self, numerical_value: NumericalValue) {
        match self {
            CompatibleNumericalTypes::Dynamic {
                all_values_within_i64_range,
@@ -189,10 +189,12 @@ impl CompatibleNumericalTypes {
 }

 impl NumericalColumnWriter {
-    pub fn column_type_and_cardinality(&self, num_docs: RowId) -> (NumericalType, Cardinality) {
-        let numerical_type = self.compatible_numerical_types.to_numerical_type();
-        let cardinality = self.column_writer.get_cardinality(num_docs);
-        (numerical_type, cardinality)
+    pub fn numerical_type(&self) -> NumericalType {
+        self.compatible_numerical_types.to_numerical_type()
+    }
+
+    pub fn cardinality(&self, num_docs: RowId) -> Cardinality {
+        self.column_writer.get_cardinality(num_docs)
    }

    pub fn record_numerical_value(
--- a/columnar/src/columnar/writer/mod.rs
+++ b/columnar/src/columnar/writer/mod.rs
@@ -7,20 +7,19 @@ use std::io;
 use std::net::Ipv6Addr;

 use column_operation::ColumnOperation;
-pub(crate) use column_writers::CompatibleNumericalTypes;
 use common::CountingWriter;
-pub(crate) use serializer::ColumnarSerializer;
+use serializer::ColumnarSerializer;
 use stacker::{Addr, ArenaHashMap, MemoryArena};

 use crate::column_index::SerializableColumnIndex;
 use crate::column_values::{
    ColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64, VecColumn,
 };
-use crate::columnar::column_type::{ColumnType, ColumnTypeCategory};
+use crate::columnar::column_type::ColumnType;
 use crate::columnar::writer::column_writers::{
    ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter,
 };
-use crate::columnar::writer::value_index::{IndexBuilder, PreallocatedIndexBuilders, OptionalIndexBuilder};
+use crate::columnar::writer::value_index::{IndexBuilder, PreallocatedIndexBuilders};
 use crate::dictionary::{DictionaryBuilder, TermIdMapping, UnorderedId};
 use crate::value::{Coerce, NumericalType, NumericalValue};
 use crate::{Cardinality, RowId};
@@ -280,35 +279,40 @@ impl ColumnarWriter {
    }
    pub fn serialize(&mut self, num_docs: RowId, wrt: &mut dyn io::Write) -> io::Result<()> {
        let mut serializer = ColumnarSerializer::new(wrt);
-        let mut columns: Vec<(&[u8], ColumnTypeCategory, Addr)> = self
+        let mut columns: Vec<(&[u8], ColumnType, Addr)> = self
            .numerical_field_hash_map
            .iter()
-            .map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::Numerical, addr))
+            .map(|(column_name, addr, _)| {
+                let numerical_column_writer: NumericalColumnWriter =
+                    self.numerical_field_hash_map.read(addr);
+                let column_type = numerical_column_writer.numerical_type().into();
+                (column_name, column_type, addr)
+            })
            .collect();
        columns.extend(
            self.bytes_field_hash_map
                .iter()
-                .map(|(term, addr, _)| (term, ColumnTypeCategory::Bytes, addr)),
+                .map(|(term, addr, _)| (term, ColumnType::Bytes, addr)),
        );
        columns.extend(
            self.str_field_hash_map
                .iter()
-                .map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::Str, addr)),
+                .map(|(column_name, addr, _)| (column_name, ColumnType::Str, addr)),
        );
        columns.extend(
            self.bool_field_hash_map
                .iter()
-                .map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::Bool, addr)),
+                .map(|(column_name, addr, _)| (column_name, ColumnType::Bool, addr)),
        );
        columns.extend(
            self.ip_addr_field_hash_map
                .iter()
-                .map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::IpAddr, addr)),
+                .map(|(column_name, addr, _)| (column_name, ColumnType::IpAddr, addr)),
        );
        columns.extend(
            self.datetime_field_hash_map
                .iter()
-                .map(|(column_name, addr, _)| (column_name, ColumnTypeCategory::DateTime, addr)),
+                .map(|(column_name, addr, _)| (column_name, ColumnType::DateTime, addr)),
        );
        columns.sort_unstable_by_key(|(column_name, col_type, _)| (*column_name, *col_type));

@@ -316,8 +320,12 @@ impl ColumnarWriter {
        let mut symbol_byte_buffer: Vec<u8> = Vec::new();
        for (column_name, column_type, addr) in columns {
            match column_type {
-                ColumnTypeCategory::Bool => {
-                    let column_writer: ColumnWriter = self.bool_field_hash_map.read(addr);
+                ColumnType::Bool | ColumnType::DateTime => {
+                    let column_writer: ColumnWriter = if column_type == ColumnType::Bool {
+                        self.bool_field_hash_map.read(addr)
+                    } else {
+                        self.datetime_field_hash_map.read(addr)
+                    };
                    let cardinality = column_writer.get_cardinality(num_docs);
                    let mut column_serializer =
                        serializer.serialize_column(column_name, ColumnType::Bool);
@@ -329,7 +337,7 @@ impl ColumnarWriter {
                        &mut column_serializer,
                    )?;
                }
-                ColumnTypeCategory::IpAddr => {
+                ColumnType::IpAddr => {
                    let column_writer: ColumnWriter = self.ip_addr_field_hash_map.read(addr);
                    let cardinality = column_writer.get_cardinality(num_docs);
                    let mut column_serializer =
@@ -342,33 +350,36 @@ impl ColumnarWriter {
                        &mut column_serializer,
                    )?;
                }
-                ColumnTypeCategory::Bytes | ColumnTypeCategory::Str => {
-                    let (column_type, str_column_writer): (ColumnType, StrOrBytesColumnWriter) =
-                        if column_type == ColumnTypeCategory::Bytes {
-                            (ColumnType::Bytes, self.bytes_field_hash_map.read(addr))
+                ColumnType::Bytes | ColumnType::Str => {
+                    let str_or_bytes_column_writer: StrOrBytesColumnWriter =
+                        if column_type == ColumnType::Bytes {
+                            self.bytes_field_hash_map.read(addr)
                        } else {
-                            (ColumnType::Str, self.str_field_hash_map.read(addr))
+                            self.str_field_hash_map.read(addr)
                        };
                    let dictionary_builder =
-                        &dictionaries[str_column_writer.dictionary_id as usize];
-                    let cardinality = str_column_writer.column_writer.get_cardinality(num_docs);
+                        &dictionaries[str_or_bytes_column_writer.dictionary_id as usize];
+                    let cardinality = str_or_bytes_column_writer
+                        .column_writer
+                        .get_cardinality(num_docs);
                    let mut column_serializer =
                        serializer.serialize_column(column_name, column_type);
                    serialize_bytes_or_str_column(
                        cardinality,
                        num_docs,
-                        str_column_writer.sort_values_within_row,
+                        str_or_bytes_column_writer.sort_values_within_row,
                        dictionary_builder,
-                        str_column_writer.operation_iterator(arena, &mut symbol_byte_buffer),
+                        str_or_bytes_column_writer
+                            .operation_iterator(arena, &mut symbol_byte_buffer),
                        buffers,
                        &mut column_serializer,
                    )?;
                }
-                ColumnTypeCategory::Numerical => {
+                ColumnType::I64 | ColumnType::F64 | ColumnType::U64 => {
                    let numerical_column_writer: NumericalColumnWriter =
                        self.numerical_field_hash_map.read(addr);
-                    let (numerical_type, cardinality) =
-                        numerical_column_writer.column_type_and_cardinality(num_docs);
+                    let numerical_type = column_type.numerical_type().unwrap();
+                    let cardinality = numerical_column_writer.cardinality(num_docs);
                    let mut column_serializer =
                        serializer.serialize_column(column_name, ColumnType::from(numerical_type));
                    serialize_numerical_column(
@@ -380,29 +391,13 @@ impl ColumnarWriter {
                        &mut column_serializer,
                    )?;
                }
-                ColumnTypeCategory::DateTime => {
-                    let column_writer: ColumnWriter = self.datetime_field_hash_map.read(addr);
-                    let cardinality = column_writer.get_cardinality(num_docs);
-                    let mut column_serializer =
-                        serializer.serialize_column(column_name, ColumnType::DateTime);
-                    serialize_numerical_column(
-                        cardinality,
-                        num_docs,
-                        NumericalType::I64,
-                        column_writer.operation_iterator(arena, &mut symbol_byte_buffer),
-                        buffers,
-                        &mut column_serializer,
-                    )?;
-                }
            };
        }
-        serializer.finalize(num_docs)?;
+        serializer.finalize()?;
        Ok(())
    }
 }

-// Serialize [Dictionary, Column, dictionary num bytes U32::LE]
-// Column: [Column Index, Column Values, column index num bytes U32::LE]
 fn serialize_bytes_or_str_column(
    cardinality: Cardinality,
    num_docs: RowId,
@@ -550,7 +545,7 @@ fn send_to_serialize_column_mappable_to_u128<
 >(
    op_iterator: impl Iterator<Item = ColumnOperation<T>>,
    cardinality: Cardinality,
-    num_rows: RowId,
+    num_docs: RowId,
    value_index_builders: &mut PreallocatedIndexBuilders,
    values: &mut Vec<T>,
    mut wrt: impl io::Write,
@@ -572,48 +567,48 @@ where
        Cardinality::Optional => {
            let optional_index_builder = value_index_builders.borrow_optional_index_builder();
            consume_operation_iterator(op_iterator, optional_index_builder, values);
-            let non_null_rows: &[u32] = optional_index_builder.finish(num_rows);
-            SerializableColumnIndex::Optional {
-                num_rows,
-                non_null_row_ids: Box::new(|| Box::new(non_null_rows.iter().copied())),
-            }
+            let optional_index = optional_index_builder.finish(num_docs);
+            SerializableColumnIndex::Optional(Box::new(optional_index))
        }
        Cardinality::Multivalued => {
            let multivalued_index_builder = value_index_builders.borrow_multivalued_index_builder();
            consume_operation_iterator(op_iterator, multivalued_index_builder, values);
-            let multivalued_index = multivalued_index_builder.finish(num_rows);
-            SerializableColumnIndex::Multivalued(Box::new(|| Box::new(multivalued_index.iter().copied())))
+            let multivalued_index = multivalued_index_builder.finish(num_docs);
+            SerializableColumnIndex::Multivalued(Box::new(multivalued_index))
        }
    };
    crate::column::serialize_column_mappable_to_u128(
        serializable_column_index,
-        &|| values.iter().copied(),
+        || values.iter().cloned(),
        values.len() as u32,
        &mut wrt,
    )?;
    Ok(())
 }

-fn sort_values_within_row_in_place(multivalued_index: &[RowId], values: &mut Vec<u64>) {
+fn sort_values_within_row_in_place(
+    multivalued_index: &impl ColumnValues<RowId>,
+    values: &mut Vec<u64>,
+) {
    let mut start_index: usize = 0;
-    for end_index in multivalued_index.iter().copied() {
+    for end_index in multivalued_index.iter() {
        let end_index = end_index as usize;
        values[start_index..end_index].sort_unstable();
        start_index = end_index;
    }
 }

-fn send_to_serialize_column_mappable_to_u64<'a>(
+fn send_to_serialize_column_mappable_to_u64(
    op_iterator: impl Iterator<Item = ColumnOperation<u64>>,
    cardinality: Cardinality,
-    num_rows: RowId,
+    num_docs: RowId,
    sort_values_within_row: bool,
-    value_index_builders: &'a mut PreallocatedIndexBuilders,
+    value_index_builders: &mut PreallocatedIndexBuilders,
    values: &mut Vec<u64>,
    mut wrt: impl io::Write,
 ) -> io::Result<()>
 where
-    for<'b> VecColumn<'b, u64>: ColumnValues<u64>,
+    for<'a> VecColumn<'a, u64>: ColumnValues<u64>,
 {
    values.clear();
    let serializable_column_index = match cardinality {
@@ -626,27 +621,24 @@ where
            SerializableColumnIndex::Full
        }
        Cardinality::Optional => {
-            let optional_index_builder: &'a mut OptionalIndexBuilder = value_index_builders.borrow_optional_index_builder();
+            let optional_index_builder = value_index_builders.borrow_optional_index_builder();
            consume_operation_iterator(op_iterator, optional_index_builder, values);
-            let optional_index: &'a [u32] = optional_index_builder.finish(num_rows);
-            SerializableColumnIndex::Optional {
-                non_null_row_ids: Box::new(move || Box::new(optional_index.iter().copied())),
-                num_rows,
-            }
+            let optional_index = optional_index_builder.finish(num_docs);
+            SerializableColumnIndex::Optional(Box::new(optional_index))
        }
        Cardinality::Multivalued => {
            let multivalued_index_builder = value_index_builders.borrow_multivalued_index_builder();
            consume_operation_iterator(op_iterator, multivalued_index_builder, values);
-            let multivalued_index = multivalued_index_builder.finish(num_rows);
+            let multivalued_index = multivalued_index_builder.finish(num_docs);
            if sort_values_within_row {
-                sort_values_within_row_in_place(multivalued_index, values);
+                sort_values_within_row_in_place(&multivalued_index, values);
            }
-            SerializableColumnIndex::Multivalued(Box::new(|| Box::new(multivalued_index.iter().copied())))
+            SerializableColumnIndex::Multivalued(Box::new(multivalued_index))
        }
    };
    crate::column::serialize_column_mappable_to_u64(
        serializable_column_index,
-        &|| values.iter().copied(),
+        &VecColumn::from(&values[..]),
        &mut wrt,
    )?;
    Ok(())
--- a/columnar/src/columnar/writer/serializer.rs
+++ b/columnar/src/columnar/writer/serializer.rs
@@ -1,12 +1,11 @@
 use std::io;
 use std::io::Write;

-use common::{BinarySerializable, CountingWriter};
+use common::CountingWriter;
 use sstable::value::RangeValueWriter;
 use sstable::RangeSSTable;

 use crate::columnar::ColumnType;
-use crate::RowId;

 pub struct ColumnarSerializer<W: io::Write> {
    wrt: CountingWriter<W>,
@@ -47,12 +46,11 @@ impl<W: io::Write> ColumnarSerializer<W> {
        }
    }

-    pub(crate) fn finalize(mut self, num_rows: RowId) -> io::Result<()> {
+    pub(crate) fn finalize(mut self) -> io::Result<()> {
        let sstable_bytes: Vec<u8> = self.sstable_range.finish()?;
        let sstable_num_bytes: u64 = sstable_bytes.len() as u64;
        self.wrt.write_all(&sstable_bytes)?;
        self.wrt.write_all(&sstable_num_bytes.to_le_bytes()[..])?;
-        num_rows.serialize(&mut self.wrt)?;
        self.wrt
            .write_all(&super::super::format_version::footer())?;
        self.wrt.flush()?;
--- a/columnar/src/columnar/writer/value_index.rs
+++ b/columnar/src/columnar/writer/value_index.rs
@@ -1,3 +1,5 @@
+use crate::column_index::SerializableOptionalIndex;
+use crate::column_values::{ColumnValues, VecColumn};
 use crate::RowId;

 /// The `IndexBuilder` interprets a sequence of
@@ -27,15 +29,34 @@ pub struct OptionalIndexBuilder {
    docs: Vec<RowId>,
 }

+struct SingleValueArrayIndex<'a> {
+    // RowIds with a value, in a strictly increasing order
+    row_ids: &'a [RowId],
+    num_rows: RowId,
+}
+
+impl<'a> SerializableOptionalIndex<'a> for SingleValueArrayIndex<'a> {
+    fn num_rows(&self) -> RowId {
+        self.num_rows
+    }
+
+    fn non_null_rows(&self) -> Box<dyn Iterator<Item = RowId> + 'a> {
+        Box::new(self.row_ids.iter().copied())
+    }
+}
+
 impl OptionalIndexBuilder {
-    pub fn finish<'a>(&'a mut self, num_rows: RowId) -> &'a [RowId] {
+    pub fn finish<'a>(&'a mut self, num_rows: RowId) -> impl SerializableOptionalIndex + 'a {
        debug_assert!(self
            .docs
            .last()
            .copied()
            .map(|last_doc| last_doc < num_rows)
            .unwrap_or(true));
-        &self.docs[..]
+        SingleValueArrayIndex {
+            row_ids: &self.docs[..],
+            num_rows,
+        }
    }

    fn reset(&mut self) {
@@ -63,10 +84,14 @@ pub struct MultivaluedIndexBuilder {
 }

 impl MultivaluedIndexBuilder {
-    pub fn finish(&mut self, num_docs: RowId) -> &[u32] {
+    pub fn finish(&mut self, num_docs: RowId) -> impl ColumnValues<u32> + '_ {
        self.start_offsets
            .resize(num_docs as usize + 1, self.total_num_vals_seen);
-        &self.start_offsets[..]
+        VecColumn {
+            values: &&self.start_offsets[..],
+            min_value: 0,
+            max_value: self.start_offsets.last().copied().unwrap_or(0),
+        }
    }

    fn reset(&mut self) {
@@ -122,14 +147,20 @@ mod tests {
        opt_value_index_builder.record_row(0u32);
        opt_value_index_builder.record_value();
        assert_eq!(
-            &opt_value_index_builder.finish(1u32),
+            &opt_value_index_builder
+                .finish(1u32)
+                .non_null_rows()
+                .collect::<Vec<u32>>(),
            &[0]
        );
        opt_value_index_builder.reset();
        opt_value_index_builder.record_row(1u32);
        opt_value_index_builder.record_value();
        assert_eq!(
-            &opt_value_index_builder.finish(2u32),
+            &opt_value_index_builder
+                .finish(2u32)
+                .non_null_rows()
+                .collect::<Vec<u32>>(),
            &[1]
        );
    }
@@ -146,7 +177,6 @@ mod tests {
            multivalued_value_index_builder
                .finish(4u32)
                .iter()
-                .copied()
                .collect::<Vec<u32>>(),
            vec![0, 0, 2, 3, 3]
        );
@@ -158,7 +188,6 @@ mod tests {
            multivalued_value_index_builder
                .finish(4u32)
                .iter()
-                .copied()
                .collect::<Vec<u32>>(),
            vec![0, 0, 0, 2, 2]
        );
--- a/columnar/src/dynamic_column.rs
+++ b/columnar/src/dynamic_column.rs
@@ -8,7 +8,7 @@ use common::{HasLen, OwnedBytes};
 use crate::column::{BytesColumn, Column, StrColumn};
 use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn};
 use crate::columnar::ColumnType;
-use crate::{Cardinality, DateTime, NumericalType};
+use crate::{DateTime, NumericalType};

 #[derive(Clone)]
 pub enum DynamicColumn {
@@ -23,18 +23,6 @@ pub enum DynamicColumn {
 }

 impl DynamicColumn {
-    pub fn get_cardinality(&self) -> Cardinality {
-        match self {
-            DynamicColumn::Bool(c) => c.get_cardinality(),
-            DynamicColumn::I64(c) => c.get_cardinality(),
-            DynamicColumn::U64(c) => c.get_cardinality(),
-            DynamicColumn::F64(c) => c.get_cardinality(),
-            DynamicColumn::IpAddr(c) => c.get_cardinality(),
-            DynamicColumn::DateTime(c) => c.get_cardinality(),
-            DynamicColumn::Bytes(c) => c.ords().get_cardinality(),
-            DynamicColumn::Str(c) => c.ords().get_cardinality(),
-        }
-    }
    pub fn column_type(&self) -> ColumnType {
        match self {
            DynamicColumn::Bool(_) => ColumnType::Bool,
@@ -48,14 +36,6 @@ impl DynamicColumn {
        }
    }

-    pub fn coerce_numerical(self, target_numerical_type: NumericalType) -> Option<Self> {
-        match target_numerical_type {
-            NumericalType::I64 => self.coerce_to_i64(),
-            NumericalType::U64 => self.coerce_to_u64(),
-            NumericalType::F64 => self.coerce_to_f64(),
-        }
-    }
-
    pub fn is_numerical(&self) -> bool {
        self.column_type().numerical_type().is_some()
    }
@@ -70,7 +50,7 @@ impl DynamicColumn {
        self.column_type().numerical_type() == Some(NumericalType::U64)
    }

-    fn coerce_to_f64(self) -> Option<DynamicColumn> {
+    pub fn coerce_to_f64(self) -> Option<DynamicColumn> {
        match self {
            DynamicColumn::I64(column) => Some(DynamicColumn::F64(Column {
                idx: column.idx,
@@ -84,7 +64,7 @@ impl DynamicColumn {
            _ => None,
        }
    }
-    fn coerce_to_i64(self) -> Option<DynamicColumn> {
+    pub fn coerce_to_i64(self) -> Option<DynamicColumn> {
        match self {
            DynamicColumn::U64(column) => {
                if column.max_value() > i64::MAX as u64 {
@@ -99,7 +79,7 @@ impl DynamicColumn {
            _ => None,
        }
    }
-    fn coerce_to_u64(self) -> Option<DynamicColumn> {
+    pub fn coerce_to_u64(self) -> Option<DynamicColumn> {
        match self {
            DynamicColumn::I64(column) => {
                if column.min_value() < 0 {
--- a/columnar/src/iterable.rs
+++ b/columnar/src/iterable.rs
@@ -1,61 +0,0 @@
-use std::marker::PhantomData;
-use std::ops::Range;
-
-pub trait Iterable<T = u64> {
-    fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_>;
-}
-
-struct Mapped<U, Original, Transform> {
-    original_iterable: Original,
-    transform: Transform,
-    input_type: PhantomData<U>,
-}
-
-impl<U, V, Original, Transform> Iterable<V> for Mapped<U, Original, Transform>
-where
-    Original: Iterable<U>,
-    Transform: Fn(U) -> V,
-{
-    fn boxed_iter(&self) -> Box<dyn Iterator<Item = V> + '_> {
-        Box::new(self.original_iterable.boxed_iter().map(&self.transform))
-    }
-}
-
-impl<U> Iterable<U> for &dyn Iterable<U> {
-    fn boxed_iter(&self) -> Box<dyn Iterator<Item = U> + '_> {
-        (*self).boxed_iter()
-    }
-}
-
-impl<F, T> Iterable<T> for F
-where F: Fn() -> Box<dyn Iterator<Item = T>>
-{
-    fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
-        self()
-    }
-}
-
-pub fn map_iterable<U, V, F, I>(
-    original_iterable: impl Fn() -> I,
-    transform: F,
-) -> impl Fn() -> std::iter::Map<I, F>
-where
-    F: Fn(U) -> V + Clone,
-    I: Iterator<Item = U>,
-{
-    move || original_iterable().map(transform.clone())
-}
-
-impl<'a, T: Copy> Iterable<T> for &'a [T] {
-    fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
-        Box::new(self.iter().copied())
-    }
-}
-
-impl<T: Copy> Iterable<T> for Range<T>
-where Range<T>: Iterator<Item = T>
-{
-    fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
-        Box::new(self.clone())
-    }
-}
--- a/columnar/src/lib.rs
+++ b/columnar/src/lib.rs
@@ -11,11 +11,10 @@ use std::io;

 mod column;
 mod column_index;
-pub mod column_values;
+mod column_values;
 mod columnar;
 mod dictionary;
 mod dynamic_column;
-mod iterable;
 pub(crate) mod utils;
 mod value;

@@ -24,9 +23,8 @@ pub use column_index::ColumnIndex;
 pub use column_values::{ColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64};
 pub use columnar::{
    merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
-    MergeRowOrder, StackMergeOrder,
+    MergeDocOrder,
 };
-pub(crate) use iterable::{map_iterable, Iterable};
 use sstable::VoidSSTable;
 pub use value::{NumericalType, NumericalValue};

@@ -68,12 +66,6 @@ pub enum Cardinality {
 }

 impl Cardinality {
-    pub fn is_optional(&self) -> bool {
-        matches!(self, Cardinality::Optional)
-    }
-    pub fn is_multivalue(&self) -> bool {
-        matches!(self, Cardinality::Multivalued)
-    }
    pub(crate) fn to_code(self) -> u8 {
        self as u8
    }
--- a/common/src/serialize.rs
+++ b/common/src/serialize.rs
@@ -5,37 +5,12 @@ use byteorder::{ReadBytesExt, WriteBytesExt};

 use crate::{Endianness, VInt};

-#[derive(Default)]
-struct Counter(u64);
-
-impl io::Write for Counter {
-    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
-        self.0 += buf.len() as u64;
-        Ok(buf.len())
-    }
-
-    fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
-        self.0 += buf.len() as u64;
-        Ok(())
-    }
-
-    fn flush(&mut self) -> io::Result<()> {
-        Ok(())
-    }
-}
-
 /// Trait for a simple binary serialization.
 pub trait BinarySerializable: fmt::Debug + Sized {
    /// Serialize
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()>;
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()>;
    /// Deserialize
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self>;
-
-    fn num_bytes(&self) -> u64 {
-        let mut counter = Counter::default();
-        self.serialize(&mut counter).unwrap();
-        counter.0
-    }
 }

 pub trait DeserializeFrom<T: BinarySerializable> {
@@ -59,7 +34,7 @@ pub trait FixedSize: BinarySerializable {
 }

 impl BinarySerializable for () {
-    fn serialize<W: Write + ?Sized>(&self, _: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, _: &mut W) -> io::Result<()> {
        Ok(())
    }
    fn deserialize<R: Read>(_: &mut R) -> io::Result<Self> {
@@ -72,7 +47,7 @@ impl FixedSize for () {
 }

 impl<T: BinarySerializable> BinarySerializable for Vec<T> {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        VInt(self.len() as u64).serialize(writer)?;
        for it in self {
            it.serialize(writer)?;
@@ -91,7 +66,7 @@ impl<T: BinarySerializable> BinarySerializable for Vec<T> {
 }

 impl<Left: BinarySerializable, Right: BinarySerializable> BinarySerializable for (Left, Right) {
-    fn serialize<W: Write + ?Sized>(&self, write: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
        self.0.serialize(write)?;
        self.1.serialize(write)
    }
@@ -106,7 +81,7 @@ impl<Left: BinarySerializable + FixedSize, Right: BinarySerializable + FixedSize
 }

 impl BinarySerializable for u32 {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u32::<Endianness>(*self)
    }

@@ -120,7 +95,7 @@ impl FixedSize for u32 {
 }

 impl BinarySerializable for u16 {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u16::<Endianness>(*self)
    }

@@ -134,7 +109,7 @@ impl FixedSize for u16 {
 }

 impl BinarySerializable for u64 {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u64::<Endianness>(*self)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
@@ -147,7 +122,7 @@ impl FixedSize for u64 {
 }

 impl BinarySerializable for u128 {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u128::<Endianness>(*self)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
@@ -160,7 +135,7 @@ impl FixedSize for u128 {
 }

 impl BinarySerializable for f32 {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_f32::<Endianness>(*self)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
@@ -173,7 +148,7 @@ impl FixedSize for f32 {
 }

 impl BinarySerializable for i64 {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_i64::<Endianness>(*self)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
@@ -186,7 +161,7 @@ impl FixedSize for i64 {
 }

 impl BinarySerializable for f64 {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_f64::<Endianness>(*self)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
@@ -199,7 +174,7 @@ impl FixedSize for f64 {
 }

 impl BinarySerializable for u8 {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u8(*self)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<u8> {
@@ -212,7 +187,7 @@ impl FixedSize for u8 {
 }

 impl BinarySerializable for bool {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u8(u8::from(*self))
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<bool> {
@@ -233,7 +208,7 @@ impl FixedSize for bool {
 }

 impl BinarySerializable for String {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        let data: &[u8] = self.as_bytes();
        VInt(data.len() as u64).serialize(writer)?;
        writer.write_all(data)
--- a/common/src/vint.rs
+++ b/common/src/vint.rs
@@ -44,7 +44,7 @@ pub fn deserialize_vint_u128(data: &[u8]) -> io::Result<(u128, &[u8])> {
 pub struct VIntU128(pub u128);

 impl BinarySerializable for VIntU128 {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        let mut buffer = vec![];
        serialize_vint_u128(self.0, &mut buffer);
        writer.write_all(&buffer)
@@ -211,7 +211,7 @@ impl VInt {
 }

 impl BinarySerializable for VInt {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        let mut buffer = [0u8; 10];
        let num_bytes = self.serialize_into(&mut buffer);
        writer.write_all(&buffer[0..num_bytes])
--- a/fastfield_codecs/Cargo.toml
+++ b/fastfield_codecs/Cargo.toml
@@ -0,0 +1,34 @@
+[package]
+name = "fastfield_codecs"
+version = "0.3.0"
+authors = ["Pascal Seitz <pascal@quickwit.io>"]
+license = "MIT"
+edition = "2021"
+description = "Fast field codecs used by tantivy"
+documentation = "https://docs.rs/fastfield_codecs/"
+homepage = "https://github.com/quickwit-oss/tantivy"
+repository = "https://github.com/quickwit-oss/tantivy"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+common = { version = "0.5", path = "../common/", package = "tantivy-common" }
+tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" }
+columnar = { version= "0.1", path="../columnar", package="tantivy-columnar" }
+prettytable-rs = {version="0.10.0", optional= true}
+rand = {version="0.8.3", optional= true}
+fastdivide = "0.4"
+log = "0.4"
+itertools = { version = "0.10.3" }
+measure_time = { version="0.8.2", optional=true}
+
+[dev-dependencies]
+more-asserts = "0.3.0"
+proptest = "1.0.0"
+rand = "0.8.3"
+
+[features]
+bin = ["prettytable-rs", "rand", "measure_time"]
+default = ["bin"]
+unstable = []
+
--- a/fastfield_codecs/README.md
+++ b/fastfield_codecs/README.md
@@ -0,0 +1,68 @@
+
+
+# Fast Field Codecs
+
+This crate contains various fast field codecs, used to compress/decompress fast field data in tantivy.
+
+## Contributing
+
+Contributing is pretty straightforward. Since the bitpacking is the simplest compressor, you can check it for reference.
+
+A codec needs to implement 2 traits:
+
+- A reader implementing `FastFieldCodecReader` to read the codec.
+- A serializer implementing `FastFieldCodecSerializer` for compression estimation and codec name + id.
+
+### Tests
+
+Once the traits are implemented test and benchmark integration is pretty easy (see `test_with_codec_data_sets` and `bench.rs`).
+
+Make sure to add the codec to the main.rs, which tests the compression ratio and estimation against different data sets. You can run it with:
+```
+cargo run --features bin
+```
+
+### TODO
+- Add real world data sets in comparison
+- Add codec to cover sparse data sets
+
+
+### Codec Comparison
+```
+----------------------------------+-------------------+------------------------+
+|                                  | Compression Ratio | Compression Estimation |
+----------------------------------+-------------------+------------------------+
+| Autoincrement                    |                   |                        |
+----------------------------------+-------------------+------------------------+
+| LinearInterpol                   | 0.000039572664    | 0.000004396963         |
+----------------------------------+-------------------+------------------------+
+| MultiLinearInterpol              | 0.1477348         | 0.17275847             |
+----------------------------------+-------------------+------------------------+
+| Bitpacked                        | 0.28126493        | 0.28125                |
+----------------------------------+-------------------+------------------------+
+| Monotonically increasing concave |                   |                        |
+----------------------------------+-------------------+------------------------+
+| LinearInterpol                   | 0.25003937        | 0.26562938             |
+----------------------------------+-------------------+------------------------+
+| MultiLinearInterpol              | 0.190665          | 0.1883836              |
+----------------------------------+-------------------+------------------------+
+| Bitpacked                        | 0.31251436        | 0.3125                 |
+----------------------------------+-------------------+------------------------+
+| Monotonically increasing convex  |                   |                        |
+----------------------------------+-------------------+------------------------+
+| LinearInterpol                   | 0.25003937        | 0.28125438             |
+----------------------------------+-------------------+------------------------+
+| MultiLinearInterpol              | 0.18676           | 0.2040086              |
+----------------------------------+-------------------+------------------------+
+| Bitpacked                        | 0.31251436        | 0.3125                 |
+----------------------------------+-------------------+------------------------+
+| Almost monotonically increasing  |                   |                        |
+----------------------------------+-------------------+------------------------+
+| LinearInterpol                   | 0.14066513        | 0.1562544              |
+----------------------------------+-------------------+------------------------+
+| MultiLinearInterpol              | 0.16335973        | 0.17275847             |
+----------------------------------+-------------------+------------------------+
+| Bitpacked                        | 0.28126493        | 0.28125                |
+----------------------------------+-------------------+------------------------+
+
+```
--- a/fastfield_codecs/benches/bench.rs
+++ b/fastfield_codecs/benches/bench.rs
@@ -0,0 +1,311 @@
+#![feature(test)]
+
+extern crate test;
+
+#[cfg(test)]
+mod tests {
+    use std::ops::RangeInclusive;
+    use std::sync::Arc;
+
+    use common::OwnedBytes;
+    use fastfield_codecs::*;
+    use rand::prelude::*;
+    use test::Bencher;
+
+    use super::*;
+
+    // Warning: this generates the same permutation at each call
+    fn generate_permutation() -> Vec<u64> {
+        let mut permutation: Vec<u64> = (0u64..100_000u64).collect();
+        permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
+        permutation
+    }
+
+    fn generate_random() -> Vec<u64> {
+        let mut permutation: Vec<u64> = (0u64..100_000u64)
+            .map(|el| el + random::<u16>() as u64)
+            .collect();
+        permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
+        permutation
+    }
+
+    // Warning: this generates the same permutation at each call
+    fn generate_permutation_gcd() -> Vec<u64> {
+        let mut permutation: Vec<u64> = (1u64..100_000u64).map(|el| el * 1000).collect();
+        permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
+        permutation
+    }
+
+    pub fn serialize_and_load<T: MonotonicallyMappableToU64 + Ord + Default>(
+        column: &[T],
+    ) -> Arc<dyn Column<T>> {
+        let mut buffer = Vec::new();
+        serialize(VecColumn::from(&column), &mut buffer, &ALL_CODEC_TYPES).unwrap();
+        open(OwnedBytes::new(buffer)).unwrap()
+    }
+
+    #[bench]
+    fn bench_intfastfield_jumpy_veclookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        b.iter(|| {
+            let mut a = 0u64;
+            for _ in 0..n {
+                a = permutation[a as usize];
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_jumpy_fflookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
+        b.iter(|| {
+            let mut a = 0u64;
+            for _ in 0..n {
+                a = column.get_val(a as u32);
+            }
+            a
+        });
+    }
+
+    const FIFTY_PERCENT_RANGE: RangeInclusive<u64> = 1..=50;
+    const SINGLE_ITEM: u64 = 90;
+    const SINGLE_ITEM_RANGE: RangeInclusive<u64> = 90..=90;
+    const ONE_PERCENT_ITEM_RANGE: RangeInclusive<u64> = 49..=49;
+    fn get_data_50percent_item() -> Vec<u128> {
+        let mut rng = StdRng::from_seed([1u8; 32]);
+
+        let mut data = vec![];
+        for _ in 0..300_000 {
+            let val = rng.gen_range(1..=100);
+            data.push(val);
+        }
+        data.push(SINGLE_ITEM);
+
+        data.shuffle(&mut rng);
+        let data = data.iter().map(|el| *el as u128).collect::<Vec<_>>();
+        data
+    }
+    fn get_u128_column_random() -> Arc<dyn Column<u128>> {
+        let permutation = generate_random();
+        let permutation = permutation.iter().map(|el| *el as u128).collect::<Vec<_>>();
+        get_u128_column_from_data(&permutation)
+    }
+
+    fn get_u128_column_from_data(data: &[u128]) -> Arc<dyn Column<u128>> {
+        let mut out = vec![];
+        let iter_gen = || data.iter().cloned();
+        serialize_u128(iter_gen, data.len() as u32, &mut out).unwrap();
+        let out = OwnedBytes::new(out);
+        open_u128::<u128>(out).unwrap()
+    }
+
+    // U64 RANGE START
+    #[bench]
+    fn bench_intfastfield_getrange_u64_50percent_hit(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(
+                FIFTY_PERCENT_RANGE,
+                0..data.len() as u32,
+                &mut positions,
+            );
+            positions
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_getrange_u64_1percent_hit(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(
+                ONE_PERCENT_ITEM_RANGE,
+                0..data.len() as u32,
+                &mut positions,
+            );
+            positions
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_getrange_u64_single_hit(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(
+                SINGLE_ITEM_RANGE,
+                0..data.len() as u32,
+                &mut positions,
+            );
+            positions
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_getrange_u64_hit_all(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(0..=u64::MAX, 0..data.len() as u32, &mut positions);
+            positions
+        });
+    }
+    // U64 RANGE END
+
+    // U128 RANGE START
+    #[bench]
+    fn bench_intfastfield_getrange_u128_50percent_hit(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let column = get_u128_column_from_data(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(
+                *FIFTY_PERCENT_RANGE.start() as u128..=*FIFTY_PERCENT_RANGE.end() as u128,
+                0..data.len() as u32,
+                &mut positions,
+            );
+            positions
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_getrange_u128_single_hit(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let column = get_u128_column_from_data(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(
+                *SINGLE_ITEM_RANGE.start() as u128..=*SINGLE_ITEM_RANGE.end() as u128,
+                0..data.len() as u32,
+                &mut positions,
+            );
+            positions
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_getrange_u128_hit_all(b: &mut Bencher) {
+        let data = get_data_50percent_item();
+        let column = get_u128_column_from_data(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(0..=u128::MAX, 0..data.len() as u32, &mut positions);
+            positions
+        });
+    }
+    // U128 RANGE END
+
+    #[bench]
+    fn bench_intfastfield_scan_all_fflookup_u128(b: &mut Bencher) {
+        let column = get_u128_column_random();
+
+        b.iter(|| {
+            let mut a = 0u128;
+            for i in 0u64..column.num_vals() as u64 {
+                a += column.get_val(i as u32);
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_jumpy_stride5_u128(b: &mut Bencher) {
+        let column = get_u128_column_random();
+
+        b.iter(|| {
+            let n = column.num_vals();
+            let mut a = 0u128;
+            for i in (0..n / 5).map(|val| val * 5) {
+                a += column.get_val(i);
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_stride7_vec(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        b.iter(|| {
+            let mut a = 0u64;
+            for i in (0..n / 7).map(|val| val * 7) {
+                a += permutation[i as usize];
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_stride7_fflookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
+        b.iter(|| {
+            let mut a = 0;
+            for i in (0..n / 7).map(|val| val * 7) {
+                a += column.get_val(i as u32);
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_scan_all_fflookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
+        b.iter(|| {
+            let mut a = 0u64;
+            for i in 0u32..n as u32 {
+                a += column.get_val(i);
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_scan_all_fflookup_gcd(b: &mut Bencher) {
+        let permutation = generate_permutation_gcd();
+        let n = permutation.len();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
+        b.iter(|| {
+            let mut a = 0u64;
+            for i in 0..n {
+                a += column.get_val(i as u32);
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_scan_all_vec(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        b.iter(|| {
+            let mut a = 0u64;
+            for i in 0..permutation.len() {
+                a += permutation[i as usize] as u64;
+            }
+            a
+        });
+    }
+}
--- a/fastfield_codecs/src/lib.rs
+++ b/fastfield_codecs/src/lib.rs
@@ -0,0 +1,10 @@
+#![warn(missing_docs)]
+#![cfg_attr(all(feature = "unstable", test), feature(test))]
+
+//! # `fastfield_codecs`
+//!
+//! - Columnar storage of data for tantivy [`Column`].
+//! - Encode data in different codecs.
+//! - Monotonically map values to u64/u128
+
+pub use columnar::ColumnValues as Column;
--- a/src/aggregation/agg_req_with_accessor.rs
+++ b/src/aggregation/agg_req_with_accessor.rs
@@ -4,7 +4,7 @@ use std::rc::Rc;
 use std::sync::atomic::AtomicU32;
 use std::sync::Arc;

-use fastfield_codecs::Column;
+use columnar::{Column, StrColumn};

 use super::agg_req::{Aggregation, Aggregations, BucketAggregationType, MetricAggregation};
 use super::bucket::{HistogramAggregation, RangeAggregation, TermsAggregation};
@@ -14,8 +14,7 @@ use super::metric::{
 };
 use super::segment_agg_result::BucketCount;
 use super::VecWithNames;
-use crate::fastfield::{type_and_cardinality, MultiValuedFastFieldReader};
-use crate::schema::Type;
+use crate::schema::{FieldType, Type};
 use crate::{InvertedIndexReader, SegmentReader, TantivyError};

 #[derive(Clone, Default)]
@@ -37,38 +36,12 @@ impl AggregationsWithAccessor {
    }
 }

-#[derive(Clone)]
-pub(crate) enum FastFieldAccessor {
-    Multi(MultiValuedFastFieldReader<u64>),
-    Single(Arc<dyn Column<u64>>),
-}
-impl FastFieldAccessor {
-    pub fn as_single(&self) -> Option<&dyn Column<u64>> {
-        match self {
-            FastFieldAccessor::Multi(_) => None,
-            FastFieldAccessor::Single(reader) => Some(&**reader),
-        }
-    }
-    pub fn into_single(self) -> Option<Arc<dyn Column<u64>>> {
-        match self {
-            FastFieldAccessor::Multi(_) => None,
-            FastFieldAccessor::Single(reader) => Some(reader),
-        }
-    }
-    pub fn as_multi(&self) -> Option<&MultiValuedFastFieldReader<u64>> {
-        match self {
-            FastFieldAccessor::Multi(reader) => Some(reader),
-            FastFieldAccessor::Single(_) => None,
-        }
-    }
-}
-
 #[derive(Clone)]
 pub struct BucketAggregationWithAccessor {
    /// In general there can be buckets without fast field access, e.g. buckets that are created
    /// based on search terms. So eventually this needs to be Option or moved.
-    pub(crate) accessor: FastFieldAccessor,
-    pub(crate) inverted_index: Option<Arc<InvertedIndexReader>>,
+    pub(crate) accessor: Column<u64>,
+    pub(crate) str_dict_column: Option<StrColumn>,
    pub(crate) field_type: Type,
    pub(crate) bucket_agg: BucketAggregationType,
    pub(crate) sub_aggregation: AggregationsWithAccessor,
@@ -83,20 +56,19 @@ impl BucketAggregationWithAccessor {
        bucket_count: Rc<AtomicU32>,
        max_bucket_count: u32,
    ) -> crate::Result<BucketAggregationWithAccessor> {
-        let mut inverted_index = None;
+        let mut str_dict_column = None;
        let (accessor, field_type) = match &bucket {
            BucketAggregationType::Range(RangeAggregation {
                field: field_name, ..
-            }) => get_ff_reader_and_validate(reader, field_name, Cardinality::SingleValue)?,
+            }) => get_ff_reader_and_validate(reader, field_name)?,
            BucketAggregationType::Histogram(HistogramAggregation {
                field: field_name, ..
-            }) => get_ff_reader_and_validate(reader, field_name, Cardinality::SingleValue)?,
+            }) => get_ff_reader_and_validate(reader, field_name)?,
            BucketAggregationType::Terms(TermsAggregation {
                field: field_name, ..
            }) => {
-                let field = reader.schema().get_field(field_name)?;
-                inverted_index = Some(reader.inverted_index(field)?);
-                get_ff_reader_and_validate(reader, field_name, Cardinality::MultiValues)?
+                str_dict_column = reader.fast_fields().str(&field_name)?;
+                get_ff_reader_and_validate(reader, field_name)?
            }
        };
        let sub_aggregation = sub_aggregation.clone();
@@ -110,7 +82,7 @@ impl BucketAggregationWithAccessor {
                max_bucket_count,
            )?,
            bucket_agg: bucket.clone(),
-            inverted_index,
+            str_dict_column,
            bucket_count: BucketCount {
                bucket_count,
                max_bucket_count,
@@ -124,7 +96,7 @@ impl BucketAggregationWithAccessor {
 pub struct MetricAggregationWithAccessor {
    pub metric: MetricAggregation,
    pub field_type: Type,
-    pub accessor: Arc<dyn Column>,
+    pub accessor: Column<u64>,
 }

 impl MetricAggregationWithAccessor {
@@ -139,13 +111,10 @@ impl MetricAggregationWithAccessor {
            | MetricAggregation::Min(MinAggregation { field: field_name })
            | MetricAggregation::Stats(StatsAggregation { field: field_name })
            | MetricAggregation::Sum(SumAggregation { field: field_name }) => {
-                let (accessor, field_type) =
-                    get_ff_reader_and_validate(reader, field_name, Cardinality::SingleValue)?;
+                let (accessor, field_type) = get_ff_reader_and_validate(reader, field_name)?;

                Ok(MetricAggregationWithAccessor {
-                    accessor: accessor
-                        .into_single()
-                        .expect("unexpected fast field cardinality"),
+                    accessor,
                    field_type,
                    metric: metric.clone(),
                })
@@ -190,32 +159,19 @@ pub(crate) fn get_aggs_with_accessor_and_validate(
 fn get_ff_reader_and_validate(
    reader: &SegmentReader,
    field_name: &str,
-    cardinality: Cardinality,
-) -> crate::Result<(FastFieldAccessor, Type)> {
+) -> crate::Result<(columnar::Column<u64>, Type)> {
    let field = reader.schema().get_field(field_name)?;
-    let field_type = reader.schema().get_field_entry(field).field_type();
-
-    if let Some((_ff_type, field_cardinality)) = type_and_cardinality(field_type) {
-        if cardinality != field_cardinality {
-            return Err(TantivyError::InvalidArgument(format!(
-                "Invalid field cardinality on field {} expected {:?}, but got {:?}",
-                field_name, cardinality, field_cardinality
-            )));
-        }
-    } else {
-        return Err(TantivyError::InvalidArgument(format!(
-            "Only fast fields of type f64, u64, i64 are supported, but got {:?} ",
-            field_type.value_type()
-        )));
-    };
+    // TODO we should get type metadata from columnar
+    let field_type = reader
+        .schema()
+        .get_field_entry(field)
+        .field_type()
+        .value_type();
+    // TODO Do validation

    let ff_fields = reader.fast_fields();
-    match cardinality {
-        Cardinality::SingleValue => ff_fields
-            .u64_lenient(field_name)
-            .map(|field| (FastFieldAccessor::Single(field), field_type.value_type())),
-        Cardinality::MultiValues => ff_fields
-            .u64s_lenient(field_name)
-            .map(|field| (FastFieldAccessor::Multi(field), field_type.value_type())),
-    }
+    let ff_field = ff_fields.u64_lenient(field_name)?.ok_or_else(|| {
+        TantivyError::InvalidArgument(format!("No fast field found for field: {}", field_name))
+    })?;
+    Ok((ff_field, field_type))
 }
--- a/src/aggregation/bucket/histogram/date_histogram.rs
+++ b/src/aggregation/bucket/histogram/date_histogram.rs
@@ -0,0 +1,123 @@
+use serde::{Deserialize, Serialize};
+
+/// DateHistogramAggregation is similar to `HistogramAggregation`, but it can only be used with date type.
+///
+/// Currently only **fixed time** intervals are supported. Calendar-aware time intervals are not
+/// supported.
+///
+/// Like the histogram, values are rounded down into the closest bucket.
+///
+/// For this calculation all fastfield values are converted to f64.
+///
+/// # Limitations/Compatibility
+/// Only fixed time intervals are supported.
+///
+/// # JSON Format
+/// ```json
+/// {
+///     "prices": {
+///         "date_histogram": {
+///             "field": "price",
+///             "fixed_interval": "30d"
+///         }
+///     }
+/// }
+/// ```
+///
+/// Response
+/// See [`BucketEntry`](crate::aggregation::agg_result::BucketEntry)
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub struct DateHistogramAggregationReq {
+    /// The field to aggregate on.
+    pub field: String,
+    /// The interval to chunk your data range. Each bucket spans a value range of [0..fixed_interval).
+    /// Accepted values
+    ///
+    /// Fixed intervals are configured with the `fixed_interval` parameter.
+    /// In contrast to calendar-aware intervals, fixed intervals are a fixed number of SI units and never deviate, regardless of where they fall on the calendar.
+    /// One second is always composed of 1000ms. This allows fixed intervals to be specified in any multiple of the supported units.
+    /// However, it means fixed intervals cannot express other units such as months, since the duration of a month is not a fixed quantity.
+    /// Attempting to specify a calendar interval like month or quarter will return an Error.
+    ///
+    /// The accepted units for fixed intervals are:
+    /// * `ms`: milliseconds
+    /// * `s`: seconds. Defined as 1000 milliseconds each.
+    /// * `m`: minutes. Defined as 60 seconds each (60_000 milliseconds).
+    /// * `h`: hours. Defined as 60 minutes each (3_600_000 milliseconds).
+    /// * `d`: days. Defined as 24 hours (86_400_000 milliseconds).
+    ///
+    /// Fractional time values are not supported, but you can address this by shifting to another time unit
+    /// (e.g., `1.5h` could instead be specified as `90m`).
+    pub fixed_interval: String,
+    /// Intervals implicitly defines an absolute grid of buckets `[interval * k, interval * (k + 1))`.
+    ///
+    pub offset: Option<String>,
+    /// Whether to return the buckets as a hash map
+    #[serde(default)]
+    pub keyed: bool,
+}
+
+impl DateHistogramAggregationReq {
+    fn validate(&self) -> crate::Result<()> {
+        Ok(())
+    }
+}
+
+#[derive(Debug, PartialEq, Eq)]
+/// Errors when parsing the fixed interval for `DateHistogramAggregationReq`.
+pub enum DateHistogramParseError {
+    /// Unit not recognized in passed String
+    UnitNotRecognized(String),
+    /// Number not found in passed String
+    NumberMissing(String),
+    /// Unit not found in passed String
+    UnitMissing(String),
+}
+
+fn parse_into_milliseconds(input: &str) -> Result<u64, DateHistogramParseError> {
+    let split_boundary = input
+        .char_indices()
+        .take_while(|(pos, el)| el.is_numeric())
+        .count();
+    let (number, unit) = input.split_at(split_boundary);
+    if number.is_empty() {
+        return Err(DateHistogramParseError::NumberMissing(input.to_string()));
+    }
+    if unit.is_empty() {
+        return Err(DateHistogramParseError::UnitMissing(input.to_string()));
+    }
+    let number: u64 = number.parse().unwrap();
+    let multiplier_from_unit = match unit {
+        "ms" => 1,
+        "s" => 1000,
+        "m" => 60 * 1000,
+        "h" => 60 * 60 * 1000,
+        "d" => 24 * 60 * 60 * 1000,
+        _ => return Err(DateHistogramParseError::UnitNotRecognized(unit.to_string())),
+    };
+
+    Ok(number * multiplier_from_unit)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parser_test() {
+        assert_eq!(parse_into_milliseconds("1m").unwrap(), 60_000);
+        assert_eq!(parse_into_milliseconds("2m").unwrap(), 120_000);
+        assert_eq!(
+            parse_into_milliseconds("2y").unwrap_err(),
+            DateHistogramParseError::UnitNotRecognized("y".to_string())
+        );
+        assert_eq!(
+            parse_into_milliseconds("2000").unwrap_err(),
+            DateHistogramParseError::UnitMissing("2000".to_string())
+        );
+        assert_eq!(
+            parse_into_milliseconds("ms").unwrap_err(),
+            DateHistogramParseError::NumberMissing("ms".to_string())
+        );
+    }
+}
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -1,7 +1,7 @@
 use std::cmp::Ordering;
 use std::fmt::Display;

-use fastfield_codecs::Column;
+use columnar::Column;
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};

@@ -13,7 +13,9 @@ use crate::aggregation::agg_result::BucketEntry;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResults, IntermediateBucketResult, IntermediateHistogramBucketEntry,
 };
-use crate::aggregation::segment_agg_result::SegmentAggregationResultsCollector;
+use crate::aggregation::segment_agg_result::{
+    GenericSegmentAggregationResultsCollector, SegmentAggregationCollector,
+};
 use crate::aggregation::{f64_from_fastfield_u64, format_date};
 use crate::schema::{Schema, Type};
 use crate::{DocId, TantivyError};
@@ -62,7 +64,6 @@ use crate::{DocId, TantivyError};
 ///
 /// Response
 /// See [`BucketEntry`](crate::aggregation::agg_result::BucketEntry)
-
 #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
 pub struct HistogramAggregation {
    /// The field to aggregate on.
@@ -184,7 +185,7 @@ pub(crate) struct SegmentHistogramBucketEntry {
 impl SegmentHistogramBucketEntry {
    pub(crate) fn into_intermediate_bucket_entry(
        self,
-        sub_aggregation: SegmentAggregationResultsCollector,
+        sub_aggregation: GenericSegmentAggregationResultsCollector,
        agg_with_accessor: &AggregationsWithAccessor,
    ) -> crate::Result<IntermediateHistogramBucketEntry> {
        Ok(IntermediateHistogramBucketEntry {
@@ -198,11 +199,11 @@ impl SegmentHistogramBucketEntry {

 /// The collector puts values from the fast field into the correct buckets and does a conversion to
 /// the correct datatype.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Debug)]
 pub struct SegmentHistogramCollector {
    /// The buckets containing the aggregation data.
    buckets: Vec<SegmentHistogramBucketEntry>,
-    sub_aggregations: Option<Vec<SegmentAggregationResultsCollector>>,
+    sub_aggregations: Option<Vec<GenericSegmentAggregationResultsCollector>>,
    field_type: Type,
    interval: f64,
    offset: f64,
@@ -283,7 +284,7 @@ impl SegmentHistogramCollector {
        req: &HistogramAggregation,
        sub_aggregation: &AggregationsWithAccessor,
        field_type: Type,
-        accessor: &dyn Column<u64>,
+        accessor: &Column<u64>,
    ) -> crate::Result<Self> {
        req.validate()?;
        let min = f64_from_fastfield_u64(accessor.min_value(), &field_type);
@@ -300,7 +301,7 @@ impl SegmentHistogramCollector {
            None
        } else {
            let sub_aggregation =
-                SegmentAggregationResultsCollector::from_req_and_validate(sub_aggregation)?;
+                GenericSegmentAggregationResultsCollector::from_req_and_validate(sub_aggregation)?;
            Some(buckets.iter().map(|_| sub_aggregation.clone()).collect())
        };

@@ -335,7 +336,7 @@ impl SegmentHistogramCollector {
    #[inline]
    pub(crate) fn collect_block(
        &mut self,
-        doc: &[DocId],
+        docs: &[DocId],
        bucket_with_accessor: &BucketAggregationWithAccessor,
        force_flush: bool,
    ) -> crate::Result<()> {
@@ -346,64 +347,20 @@ impl SegmentHistogramCollector {
        let get_bucket_num =
            |val| (get_bucket_num_f64(val, interval, offset) as i64 - first_bucket_num) as usize;

-        let accessor = bucket_with_accessor
-            .accessor
-            .as_single()
-            .expect("unexpected fast field cardinatility");
-        let mut iter = doc.chunks_exact(4);
-        for docs in iter.by_ref() {
-            let val0 = self.f64_from_fastfield_u64(accessor.get_val(docs[0]));
-            let val1 = self.f64_from_fastfield_u64(accessor.get_val(docs[1]));
-            let val2 = self.f64_from_fastfield_u64(accessor.get_val(docs[2]));
-            let val3 = self.f64_from_fastfield_u64(accessor.get_val(docs[3]));
+        let accessor = &bucket_with_accessor.accessor;
+        for doc in docs {
+            for val in accessor.values(*doc) {
+                let val = self.f64_from_fastfield_u64(val);

-            let bucket_pos0 = get_bucket_num(val0);
-            let bucket_pos1 = get_bucket_num(val1);
-            let bucket_pos2 = get_bucket_num(val2);
-            let bucket_pos3 = get_bucket_num(val3);
-
-            self.increment_bucket_if_in_bounds(
-                val0,
-                &bounds,
-                bucket_pos0,
-                docs[0],
-                &bucket_with_accessor.sub_aggregation,
-            )?;
-            self.increment_bucket_if_in_bounds(
-                val1,
-                &bounds,
-                bucket_pos1,
-                docs[1],
-                &bucket_with_accessor.sub_aggregation,
-            )?;
-            self.increment_bucket_if_in_bounds(
-                val2,
-                &bounds,
-                bucket_pos2,
-                docs[2],
-                &bucket_with_accessor.sub_aggregation,
-            )?;
-            self.increment_bucket_if_in_bounds(
-                val3,
-                &bounds,
-                bucket_pos3,
-                docs[3],
-                &bucket_with_accessor.sub_aggregation,
-            )?;
-        }
-        for &doc in iter.remainder() {
-            let val = f64_from_fastfield_u64(accessor.get_val(doc), &self.field_type);
-            if !bounds.contains(val) {
-                continue;
+                let bucket_pos = get_bucket_num(val);
+                self.increment_bucket_if_in_bounds(
+                    val,
+                    &bounds,
+                    bucket_pos,
+                    *doc,
+                    &bucket_with_accessor.sub_aggregation,
+                )?;
            }
-            let bucket_pos = (get_bucket_num_f64(val, self.interval, self.offset) as i64
-                - self.first_bucket_num) as usize;
-
-            debug_assert_eq!(
-                self.buckets[bucket_pos].key,
-                get_bucket_val(val, self.interval, self.offset)
-            );
-            self.increment_bucket(bucket_pos, doc, &bucket_with_accessor.sub_aggregation)?;
        }
        if force_flush {
            if let Some(sub_aggregations) = self.sub_aggregations.as_mut() {
--- a/src/aggregation/bucket/histogram/mod.rs
+++ b/src/aggregation/bucket/histogram/mod.rs
@@ -1,2 +1,4 @@
+mod date_histogram;
 mod histogram;
+pub use date_histogram::*;
 pub use histogram::*;
--- a/src/aggregation/bucket/range.rs
+++ b/src/aggregation/bucket/range.rs
@@ -1,7 +1,7 @@
 use std::fmt::Debug;
 use std::ops::Range;

-use fastfield_codecs::MonotonicallyMappableToU64;
+use columnar::MonotonicallyMappableToU64;
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

@@ -11,7 +11,9 @@ use crate::aggregation::agg_req_with_accessor::{
 use crate::aggregation::intermediate_agg_result::{
    IntermediateBucketResult, IntermediateRangeBucketEntry, IntermediateRangeBucketResult,
 };
-use crate::aggregation::segment_agg_result::{BucketCount, SegmentAggregationResultsCollector};
+use crate::aggregation::segment_agg_result::{
+    BucketCount, GenericSegmentAggregationResultsCollector, SegmentAggregationCollector,
+};
 use crate::aggregation::{
    f64_from_fastfield_u64, f64_to_fastfield_u64, format_date, Key, SerializedKey,
 };
@@ -114,7 +116,7 @@ impl From<Range<u64>> for InternalRangeAggregationRange {
    }
 }

-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Debug)]
 pub(crate) struct SegmentRangeAndBucketEntry {
    range: Range<u64>,
    bucket: SegmentRangeBucketEntry,
@@ -122,18 +124,18 @@ pub(crate) struct SegmentRangeAndBucketEntry {

 /// The collector puts values from the fast field into the correct buckets and does a conversion to
 /// the correct datatype.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Debug)]
 pub struct SegmentRangeCollector {
    /// The buckets containing the aggregation data.
    buckets: Vec<SegmentRangeAndBucketEntry>,
    field_type: Type,
 }

-#[derive(Clone, PartialEq)]
+#[derive(Clone)]
 pub(crate) struct SegmentRangeBucketEntry {
    pub key: Key,
    pub doc_count: u64,
-    pub sub_aggregation: Option<SegmentAggregationResultsCollector>,
+    pub sub_aggregation: Option<GenericSegmentAggregationResultsCollector>,
    /// The from range of the bucket. Equals `f64::MIN` when `None`.
    pub from: Option<f64>,
    /// The to range of the bucket. Equals `f64::MAX` when `None`. Open interval, `to` is not
@@ -227,9 +229,11 @@ impl SegmentRangeCollector {
                let sub_aggregation = if sub_aggregation.is_empty() {
                    None
                } else {
-                    Some(SegmentAggregationResultsCollector::from_req_and_validate(
-                        sub_aggregation,
-                    )?)
+                    Some(
+                        GenericSegmentAggregationResultsCollector::from_req_and_validate(
+                            sub_aggregation,
+                        )?,
+                    )
                };

                Ok(SegmentRangeAndBucketEntry {
@@ -257,35 +261,18 @@ impl SegmentRangeCollector {
    #[inline]
    pub(crate) fn collect_block(
        &mut self,
-        doc: &[DocId],
+        docs: &[DocId],
        bucket_with_accessor: &BucketAggregationWithAccessor,
        force_flush: bool,
    ) -> crate::Result<()> {
-        let mut iter = doc.chunks_exact(4);
-        let accessor = bucket_with_accessor
-            .accessor
-            .as_single()
-            .expect("unexpected fast field cardinality");
-        for docs in iter.by_ref() {
-            let val1 = accessor.get_val(docs[0]);
-            let val2 = accessor.get_val(docs[1]);
-            let val3 = accessor.get_val(docs[2]);
-            let val4 = accessor.get_val(docs[3]);
-            let bucket_pos1 = self.get_bucket_pos(val1);
-            let bucket_pos2 = self.get_bucket_pos(val2);
-            let bucket_pos3 = self.get_bucket_pos(val3);
-            let bucket_pos4 = self.get_bucket_pos(val4);
+        let accessor = &bucket_with_accessor.accessor;
+        for doc in docs {
+            for val in accessor.values(*doc) {
+                let bucket_pos = self.get_bucket_pos(val);
+                self.increment_bucket(bucket_pos, *doc, &bucket_with_accessor.sub_aggregation)?;
+            }
+        }

-            self.increment_bucket(bucket_pos1, docs[0], &bucket_with_accessor.sub_aggregation)?;
-            self.increment_bucket(bucket_pos2, docs[1], &bucket_with_accessor.sub_aggregation)?;
-            self.increment_bucket(bucket_pos3, docs[2], &bucket_with_accessor.sub_aggregation)?;
-            self.increment_bucket(bucket_pos4, docs[3], &bucket_with_accessor.sub_aggregation)?;
-        }
-        for &doc in iter.remainder() {
-            let val = accessor.get_val(doc);
-            let bucket_pos = self.get_bucket_pos(val);
-            self.increment_bucket(bucket_pos, doc, &bucket_with_accessor.sub_aggregation)?;
-        }
        if force_flush {
            for bucket in &mut self.buckets {
                if let Some(sub_aggregation) = &mut bucket.bucket.sub_aggregation {
@@ -434,7 +421,7 @@ pub(crate) fn range_to_key(range: &Range<u64>, field_type: &Type) -> crate::Resu
 #[cfg(test)]
 mod tests {

-    use fastfield_codecs::MonotonicallyMappableToU64;
+    use columnar::MonotonicallyMappableToU64;
    use serde_json::Value;

    use super::*;
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -1,5 +1,6 @@
 use std::fmt::Debug;

+use columnar::Column;
 use itertools::Itertools;
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
@@ -11,9 +12,11 @@ use crate::aggregation::agg_req_with_accessor::{
 use crate::aggregation::intermediate_agg_result::{
    IntermediateBucketResult, IntermediateTermBucketEntry, IntermediateTermBucketResult,
 };
-use crate::aggregation::segment_agg_result::{BucketCount, SegmentAggregationResultsCollector};
+use crate::aggregation::segment_agg_result::{
+    build_segment_agg_collector, GenericSegmentAggregationResultsCollector,
+    SegmentAggregationCollector,
+};
 use crate::error::DataCorruption;
-use crate::fastfield::MultiValuedFastFieldReader;
 use crate::schema::Type;
 use crate::{DocId, TantivyError};

@@ -196,17 +199,16 @@ impl TermsAggregationInternal {
    }
 }

-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Debug, Default)]
 /// Container to store term_ids and their buckets.
 struct TermBuckets {
    pub(crate) entries: FxHashMap<u32, TermBucketEntry>,
-    blueprint: Option<SegmentAggregationResultsCollector>,
 }

-#[derive(Clone, PartialEq, Default)]
+#[derive(Clone, Default)]
 struct TermBucketEntry {
    doc_count: u64,
-    sub_aggregations: Option<SegmentAggregationResultsCollector>,
+    sub_aggregations: Option<Box<dyn SegmentAggregationCollector>>,
 }

 impl Debug for TermBucketEntry {
@@ -218,7 +220,7 @@ impl Debug for TermBucketEntry {
 }

 impl TermBucketEntry {
-    fn from_blueprint(blueprint: &Option<SegmentAggregationResultsCollector>) -> Self {
+    fn from_blueprint(blueprint: &Option<Box<dyn SegmentAggregationCollector>>) -> Self {
        Self {
            doc_count: 0,
            sub_aggregations: blueprint.clone(),
@@ -247,46 +249,11 @@ impl TermBuckets {
        sub_aggregation: &AggregationsWithAccessor,
        _max_term_id: usize,
    ) -> crate::Result<Self> {
-        let has_sub_aggregations = sub_aggregation.is_empty();
-
-        let blueprint = if has_sub_aggregations {
-            let sub_aggregation =
-                SegmentAggregationResultsCollector::from_req_and_validate(sub_aggregation)?;
-            Some(sub_aggregation)
-        } else {
-            None
-        };
-
        Ok(TermBuckets {
-            blueprint,
            entries: Default::default(),
        })
    }

-    fn increment_bucket(
-        &mut self,
-        term_ids: &[u64],
-        doc: DocId,
-        sub_aggregation: &AggregationsWithAccessor,
-        bucket_count: &BucketCount,
-        blueprint: &Option<SegmentAggregationResultsCollector>,
-    ) -> crate::Result<()> {
-        for &term_id in term_ids {
-            let entry = self.entries.entry(term_id as u32).or_insert_with(|| {
-                bucket_count.add_count(1);
-
-                TermBucketEntry::from_blueprint(blueprint)
-            });
-            entry.doc_count += 1;
-            if let Some(sub_aggregations) = entry.sub_aggregations.as_mut() {
-                sub_aggregations.collect(doc, sub_aggregation)?;
-            }
-        }
-        bucket_count.validate_bucket_count()?;
-
-        Ok(())
-    }
-
    fn force_flush(&mut self, agg_with_accessor: &AggregationsWithAccessor) -> crate::Result<()> {
        for entry in &mut self.entries.values_mut() {
            if let Some(sub_aggregations) = entry.sub_aggregations.as_mut() {
@@ -299,13 +266,12 @@ impl TermBuckets {

 /// The collector puts values from the fast field into the correct buckets and does a conversion to
 /// the correct datatype.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Debug)]
 pub struct SegmentTermCollector {
    /// The buckets containing the aggregation data.
    term_buckets: TermBuckets,
    req: TermsAggregationInternal,
-    field_type: Type,
-    blueprint: Option<SegmentAggregationResultsCollector>,
+    blueprint: Option<Box<dyn SegmentAggregationCollector>>,
 }

 pub(crate) fn get_agg_name_and_property(name: &str) -> (&str, &str) {
@@ -317,12 +283,8 @@ impl SegmentTermCollector {
    pub(crate) fn from_req_and_validate(
        req: &TermsAggregation,
        sub_aggregations: &AggregationsWithAccessor,
-        field_type: Type,
-        accessor: &MultiValuedFastFieldReader<u64>,
    ) -> crate::Result<Self> {
-        let max_term_id = accessor.max_value();
-        let term_buckets =
-            TermBuckets::from_req_and_validate(sub_aggregations, max_term_id as usize)?;
+        let term_buckets = TermBuckets::default();

        if let Some(custom_order) = req.order.as_ref() {
            // Validate sub aggregtion exists
@@ -340,8 +302,7 @@ impl SegmentTermCollector {

        let has_sub_aggregations = !sub_aggregations.is_empty();
        let blueprint = if has_sub_aggregations {
-            let sub_aggregation =
-                SegmentAggregationResultsCollector::from_req_and_validate(sub_aggregations)?;
+            let sub_aggregation = build_segment_agg_collector(sub_aggregations)?;
            Some(sub_aggregation)
        } else {
            None
@@ -350,7 +311,6 @@ impl SegmentTermCollector {
        Ok(SegmentTermCollector {
            req: TermsAggregationInternal::from_req(req),
            term_buckets,
-            field_type,
            blueprint,
        })
    }
@@ -368,7 +328,14 @@ impl SegmentTermCollector {

        match self.req.order.target {
            OrderTarget::Key => {
-                // defer order and cut_off after loading the texts from the dictionary
+                // We rely on the fact, that term ordinals match the order of the strings
+                // TODO: We could have a special collector, that keeps only TOP n results at any
+                // time.
+                if self.req.order.order == Order::Desc {
+                    entries.sort_unstable_by_key(|bucket| std::cmp::Reverse(bucket.0));
+                } else {
+                    entries.sort_unstable_by_key(|bucket| bucket.0);
+                }
            }
            OrderTarget::SubAggregation(_name) => {
                // don't sort and cut off since it's hard to make assumptions on the quality of the
@@ -384,34 +351,40 @@ impl SegmentTermCollector {
            }
        }

-        let (term_doc_count_before_cutoff, mut sum_other_doc_count) =
-            if order_by_key || order_by_sub_aggregation {
-                (0, 0)
-            } else {
-                cut_off_buckets(&mut entries, self.req.segment_size as usize)
-            };
+        let (term_doc_count_before_cutoff, mut sum_other_doc_count) = if order_by_sub_aggregation {
+            (0, 0)
+        } else {
+            cut_off_buckets(&mut entries, self.req.segment_size as usize)
+        };

        let inverted_index = agg_with_accessor
-            .inverted_index
+            .str_dict_column
            .as_ref()
            .expect("internal error: inverted index not loaded for term aggregation");
-        let term_dict = inverted_index.terms();
+        let term_dict = inverted_index;

        let mut dict: FxHashMap<String, IntermediateTermBucketEntry> = Default::default();
-        let mut buffer = vec![];
+        let mut buffer = String::new();
        for (term_id, entry) in entries {
-            term_dict
-                .ord_to_term(term_id as u64, &mut buffer)
-                .expect("could not find term");
+            if !term_dict.ord_to_str(term_id as u64, &mut buffer)? {
+                return Err(TantivyError::InternalError(format!(
+                    "Couldn't find term_id {} in dict",
+                    term_id
+                )));
+            }
            dict.insert(
-                String::from_utf8(buffer.to_vec())
-                    .map_err(|utf8_err| DataCorruption::comment_only(utf8_err.to_string()))?,
+                buffer.to_string(),
                entry.into_intermediate_bucket_entry(&agg_with_accessor.sub_aggregation)?,
            );
        }
        if self.req.min_doc_count == 0 {
-            let mut stream = term_dict.stream()?;
+            // TODO: Handle rev streaming for descending sorting by keys
+            let mut stream = term_dict.dictionary().stream()?;
            while let Some((key, _ord)) = stream.next() {
+                if dict.len() >= self.req.segment_size as usize {
+                    break;
+                }
+
                let key = std::str::from_utf8(key)
                    .map_err(|utf8_err| DataCorruption::comment_only(utf8_err.to_string()))?;
                if !dict.contains_key(key) {
@@ -446,65 +419,26 @@ impl SegmentTermCollector {
    #[inline]
    pub(crate) fn collect_block(
        &mut self,
-        doc: &[DocId],
+        docs: &[DocId],
        bucket_with_accessor: &BucketAggregationWithAccessor,
        force_flush: bool,
    ) -> crate::Result<()> {
-        let accessor = bucket_with_accessor
-            .accessor
-            .as_multi()
-            .expect("unexpected fast field cardinatility");
-        let mut iter = doc.chunks_exact(4);
-        let mut vals1 = vec![];
-        let mut vals2 = vec![];
-        let mut vals3 = vec![];
-        let mut vals4 = vec![];
-        for docs in iter.by_ref() {
-            accessor.get_vals(docs[0], &mut vals1);
-            accessor.get_vals(docs[1], &mut vals2);
-            accessor.get_vals(docs[2], &mut vals3);
-            accessor.get_vals(docs[3], &mut vals4);
+        let accessor = &bucket_with_accessor.accessor;

-            self.term_buckets.increment_bucket(
-                &vals1,
-                docs[0],
-                &bucket_with_accessor.sub_aggregation,
-                &bucket_with_accessor.bucket_count,
-                &self.blueprint,
-            )?;
-            self.term_buckets.increment_bucket(
-                &vals2,
-                docs[1],
-                &bucket_with_accessor.sub_aggregation,
-                &bucket_with_accessor.bucket_count,
-                &self.blueprint,
-            )?;
-            self.term_buckets.increment_bucket(
-                &vals3,
-                docs[2],
-                &bucket_with_accessor.sub_aggregation,
-                &bucket_with_accessor.bucket_count,
-                &self.blueprint,
-            )?;
-            self.term_buckets.increment_bucket(
-                &vals4,
-                docs[3],
-                &bucket_with_accessor.sub_aggregation,
-                &bucket_with_accessor.bucket_count,
-                &self.blueprint,
-            )?;
+        for doc in docs {
+            for term_id in accessor.values(*doc) {
+                let entry = self
+                    .term_buckets
+                    .entries
+                    .entry(term_id as u32)
+                    .or_insert_with(|| TermBucketEntry::from_blueprint(&self.blueprint));
+                entry.doc_count += 1;
+                if let Some(sub_aggregations) = entry.sub_aggregations.as_mut() {
+                    sub_aggregations.collect(*doc, &bucket_with_accessor.sub_aggregation)?;
+                }
+            }
        }
-        for &doc in iter.remainder() {
-            accessor.get_vals(doc, &mut vals1);

-            self.term_buckets.increment_bucket(
-                &vals1,
-                doc,
-                &bucket_with_accessor.sub_aggregation,
-                &bucket_with_accessor.bucket_count,
-                &self.blueprint,
-            )?;
-        }
        if force_flush {
            self.term_buckets
                .force_flush(&bucket_with_accessor.sub_aggregation)?;
@@ -1207,36 +1141,37 @@ mod tests {
        Ok(())
    }

-    #[test]
-    fn terms_aggregation_term_bucket_limit() -> crate::Result<()> {
-        let terms: Vec<String> = (0..100_000).map(|el| el.to_string()).collect();
-        let terms_per_segment = vec![terms.iter().map(|el| el.as_str()).collect()];
+    // TODO reenable with memory limit
+    //#[test]
+    // fn terms_aggregation_term_bucket_limit() -> crate::Result<()> {
+    // let terms: Vec<String> = (0..100_000).map(|el| el.to_string()).collect();
+    // let terms_per_segment = vec![terms.iter().map(|el| el.as_str()).collect()];

-        let index = get_test_index_from_terms(true, &terms_per_segment)?;
+    // let index = get_test_index_from_terms(true, &terms_per_segment)?;

-        let agg_req: Aggregations = vec![(
-            "my_texts".to_string(),
-            Aggregation::Bucket(BucketAggregation {
-                bucket_agg: BucketAggregationType::Terms(TermsAggregation {
-                    field: "string_id".to_string(),
-                    min_doc_count: Some(0),
-                    ..Default::default()
-                }),
-                sub_aggregation: Default::default(),
-            }),
-        )]
-        .into_iter()
-        .collect();
+    // let agg_req: Aggregations = vec![(
+    //"my_texts".to_string(),
+    // Aggregation::Bucket(BucketAggregation {
+    // bucket_agg: BucketAggregationType::Terms(TermsAggregation {
+    // field: "string_id".to_string(),
+    // min_doc_count: Some(0),
+    //..Default::default()
+    //}),
+    // sub_aggregation: Default::default(),
+    //}),
+    //)]
+    //.into_iter()
+    //.collect();

-        let res = exec_request_with_query(agg_req, &index, None);
+    // let res = exec_request_with_query(agg_req, &index, None);

-        assert!(res.is_err());
+    // assert!(res.is_err());

-        Ok(())
-    }
+    // Ok(())
+    //}

    #[test]
-    fn terms_aggregation_multi_token_per_doc() -> crate::Result<()> {
+    fn terms_aggregation_different_tokenizer_on_ff_test() -> crate::Result<()> {
        let terms = vec!["Hello Hello", "Hallo Hallo"];

        let index = get_test_index_from_terms(true, &[terms])?;
@@ -1256,12 +1191,13 @@ mod tests {
        .collect();

        let res = exec_request_with_query(agg_req, &index, None).unwrap();
+        println!("{}", serde_json::to_string_pretty(&res).unwrap());

-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "hello");
-        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 2);
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "Hallo Hallo");
+        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 1);

-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "hallo");
-        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 2);
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "Hello Hello");
+        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 1);

        Ok(())
    }
@@ -1352,68 +1288,3 @@ mod tests {
        Ok(())
    }
 }
-
-#[cfg(all(test, feature = "unstable"))]
-mod bench {
-
-    use itertools::Itertools;
-    use rand::seq::SliceRandom;
-    use rand::thread_rng;
-
-    use super::*;
-
-    fn get_collector_with_buckets(num_docs: u64) -> TermBuckets {
-        TermBuckets::from_req_and_validate(&Default::default(), num_docs as usize).unwrap()
-    }
-
-    fn get_rand_terms(total_terms: u64, num_terms_returned: u64) -> Vec<u64> {
-        let mut rng = thread_rng();
-
-        let all_terms = (0..total_terms - 1).collect_vec();
-
-        let mut vals = vec![];
-        for _ in 0..num_terms_returned {
-            let val = all_terms.as_slice().choose(&mut rng).unwrap();
-            vals.push(*val);
-        }
-
-        vals
-    }
-
-    fn bench_term_buckets(b: &mut test::Bencher, num_terms: u64, total_terms: u64) {
-        let mut collector = get_collector_with_buckets(total_terms);
-        let vals = get_rand_terms(total_terms, num_terms);
-        let aggregations_with_accessor: AggregationsWithAccessor = Default::default();
-        let bucket_count: BucketCount = BucketCount {
-            bucket_count: Default::default(),
-            max_bucket_count: 1_000_001u32,
-        };
-        b.iter(|| {
-            for &val in &vals {
-                collector
-                    .increment_bucket(&[val], 0, &aggregations_with_accessor, &bucket_count, &None)
-                    .unwrap();
-            }
-        })
-    }
-
-    #[bench]
-    fn bench_term_buckets_500_of_1_000_000(b: &mut test::Bencher) {
-        bench_term_buckets(b, 500u64, 1_000_000u64)
-    }
-
-    #[bench]
-    fn bench_term_buckets_1_000_000_of_50_000(b: &mut test::Bencher) {
-        bench_term_buckets(b, 1_000_000u64, 50_000u64)
-    }
-
-    #[bench]
-    fn bench_term_buckets_1_000_000_of_50(b: &mut test::Bencher) {
-        bench_term_buckets(b, 1_000_000u64, 50u64)
-    }
-
-    #[bench]
-    fn bench_term_buckets_1_000_000_of_1_000_000(b: &mut test::Bencher) {
-        bench_term_buckets(b, 1_000_000u64, 1_000_000u64)
-    }
-}
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -4,7 +4,10 @@ use super::agg_req::Aggregations;
 use super::agg_req_with_accessor::AggregationsWithAccessor;
 use super::agg_result::AggregationResults;
 use super::intermediate_agg_result::IntermediateAggregationResults;
-use super::segment_agg_result::SegmentAggregationResultsCollector;
+use super::segment_agg_result::{
+    build_segment_agg_collector, GenericSegmentAggregationResultsCollector,
+    SegmentAggregationCollector,
+};
 use crate::aggregation::agg_req_with_accessor::get_aggs_with_accessor_and_validate;
 use crate::collector::{Collector, SegmentCollector};
 use crate::schema::Schema;
@@ -137,7 +140,7 @@ fn merge_fruits(
 /// `AggregationSegmentCollector` does the aggregation collection on a segment.
 pub struct AggregationSegmentCollector {
    aggs_with_accessor: AggregationsWithAccessor,
-    result: SegmentAggregationResultsCollector,
+    result: Box<dyn SegmentAggregationCollector>,
    error: Option<TantivyError>,
 }

@@ -151,8 +154,7 @@ impl AggregationSegmentCollector {
    ) -> crate::Result<Self> {
        let aggs_with_accessor =
            get_aggs_with_accessor_and_validate(agg, reader, Rc::default(), max_bucket_count)?;
-        let result =
-            SegmentAggregationResultsCollector::from_req_and_validate(&aggs_with_accessor)?;
+        let result = build_segment_agg_collector(&aggs_with_accessor)?;
        Ok(AggregationSegmentCollector {
            aggs_with_accessor,
            result,
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -222,24 +222,23 @@ pub enum IntermediateMetricResult {

 impl From<SegmentMetricResultCollector> for IntermediateMetricResult {
    fn from(tree: SegmentMetricResultCollector) -> Self {
+        use super::metric::SegmentStatsType;
        match tree {
            SegmentMetricResultCollector::Stats(collector) => match collector.collecting_for {
-                super::metric::SegmentStatsType::Average => IntermediateMetricResult::Average(
+                SegmentStatsType::Average => IntermediateMetricResult::Average(
                    IntermediateAverage::from_collector(collector),
                ),
-                super::metric::SegmentStatsType::Count => {
+                SegmentStatsType::Count => {
                    IntermediateMetricResult::Count(IntermediateCount::from_collector(collector))
                }
-                super::metric::SegmentStatsType::Max => {
+                SegmentStatsType::Max => {
                    IntermediateMetricResult::Max(IntermediateMax::from_collector(collector))
                }
-                super::metric::SegmentStatsType::Min => {
+                SegmentStatsType::Min => {
                    IntermediateMetricResult::Min(IntermediateMin::from_collector(collector))
                }
-                super::metric::SegmentStatsType::Stats => {
-                    IntermediateMetricResult::Stats(collector.stats)
-                }
-                super::metric::SegmentStatsType::Sum => {
+                SegmentStatsType::Stats => IntermediateMetricResult::Stats(collector.stats),
+                SegmentStatsType::Sum => {
                    IntermediateMetricResult::Sum(IntermediateSum::from_collector(collector))
                }
            },
--- a/src/aggregation/metric/stats.rs
+++ b/src/aggregation/metric/stats.rs
@@ -1,10 +1,17 @@
-use fastfield_codecs::Column;
+use columnar::Column;
 use serde::{Deserialize, Serialize};

-use crate::aggregation::f64_from_fastfield_u64;
+use crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor;
+use crate::aggregation::intermediate_agg_result::{
+    IntermediateAggregationResults, IntermediateMetricResult,
+};
+use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
+use crate::aggregation::{f64_from_fastfield_u64, VecWithNames};
 use crate::schema::Type;
 use crate::{DocId, TantivyError};

+use super::*;
+
 /// A multi-value metric aggregation that computes a collection of statistics on numeric values that
 /// are extracted from the aggregated documents.
 /// See [`Stats`] for returned statistics.
@@ -160,27 +167,74 @@ impl SegmentStatsCollector {
            stats: IntermediateStats::default(),
        }
    }
-    pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column<u64>) {
-        let mut iter = doc.chunks_exact(4);
-        for docs in iter.by_ref() {
-            let val1 = field.get_val(docs[0]);
-            let val2 = field.get_val(docs[1]);
-            let val3 = field.get_val(docs[2]);
-            let val4 = field.get_val(docs[3]);
-            let val1 = f64_from_fastfield_u64(val1, &self.field_type);
-            let val2 = f64_from_fastfield_u64(val2, &self.field_type);
-            let val3 = f64_from_fastfield_u64(val3, &self.field_type);
-            let val4 = f64_from_fastfield_u64(val4, &self.field_type);
+    pub(crate) fn collect_block(&mut self, docs: &[DocId], field: &Column<u64>) {
+        // TODO special case for Required, Optional column type
+        for doc in docs {
+            for val in field.values(*doc) {
+                let val1 = f64_from_fastfield_u64(val, &self.field_type);
+                self.stats.collect(val1);
+            }
+        }
+    }
+}
+
+impl SegmentAggregationCollector for SegmentStatsCollector {
+    fn into_intermediate_aggregations_result(
+        self: Box<Self>,
+        agg_with_accessor: &AggregationsWithAccessor,
+    ) -> crate::Result<IntermediateAggregationResults> {
+        let name = agg_with_accessor.metrics.keys[0].to_string();
+
+        let intermediate_metric_result = match self.collecting_for {
+            SegmentStatsType::Average => {
+                IntermediateMetricResult::Average(IntermediateAverage::from_collector(*self))
+            }
+            SegmentStatsType::Count => {
+                IntermediateMetricResult::Count(IntermediateCount::from_collector(*self))
+            }
+            SegmentStatsType::Max => {
+                IntermediateMetricResult::Max(IntermediateMax::from_collector(*self))
+            }
+            SegmentStatsType::Min => {
+                IntermediateMetricResult::Min(IntermediateMin::from_collector(*self))
+            }
+            SegmentStatsType::Stats => IntermediateMetricResult::Stats(self.stats),
+            SegmentStatsType::Sum => {
+                IntermediateMetricResult::Sum(IntermediateSum::from_collector(*self))
+            }
+        };
+
+        let metrics = Some(VecWithNames::from_entries(vec![(
+            name,
+            intermediate_metric_result,
+        )]));
+
+        Ok(IntermediateAggregationResults {
+            metrics,
+            buckets: None,
+        })
+    }
+
+    fn collect(
+        &mut self,
+        doc: crate::DocId,
+        agg_with_accessor: &AggregationsWithAccessor,
+    ) -> crate::Result<()> {
+        let accessor = &agg_with_accessor.metrics.values[0].accessor;
+        for val in accessor.values(doc) {
+            let val1 = f64_from_fastfield_u64(val, &self.field_type);
            self.stats.collect(val1);
-            self.stats.collect(val2);
-            self.stats.collect(val3);
-            self.stats.collect(val4);
-        }
-        for &doc in iter.remainder() {
-            let val = field.get_val(doc);
-            let val = f64_from_fastfield_u64(val, &self.field_type);
-            self.stats.collect(val);
        }
+
+        Ok(())
+    }
+
+    fn flush_staged_docs(
+        &mut self,
+        _agg_with_accessor: &AggregationsWithAccessor,
+        _force_flush: bool,
+    ) -> crate::Result<()> {
+        Ok(())
    }
 }

--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -172,8 +172,8 @@ pub use collector::{
    AggregationCollector, AggregationSegmentCollector, DistributedAggregationCollector,
    MAX_BUCKET_COUNT,
 };
+use columnar::MonotonicallyMappableToU64;
 pub(crate) use date::format_date;
-use fastfield_codecs::MonotonicallyMappableToU64;
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};

@@ -182,7 +182,7 @@ use crate::schema::Type;
 /// Represents an associative array `(key => values)` in a very efficient manner.
 #[derive(Clone, PartialEq, Serialize, Deserialize)]
 pub(crate) struct VecWithNames<T: Clone> {
-    values: Vec<T>,
+    pub(crate) values: Vec<T>,
    keys: Vec<String>,
 }

@@ -248,9 +248,6 @@ impl<T: Clone> VecWithNames<T> {
    fn values_mut(&mut self) -> impl Iterator<Item = &mut T> + '_ {
        self.values.iter_mut()
    }
-    fn entries(&self) -> impl Iterator<Item = (&str, &T)> + '_ {
-        self.keys().zip(self.values.iter())
-    }
    fn is_empty(&self) -> bool {
        self.keys.is_empty()
    }
@@ -336,8 +333,9 @@ mod tests {
    use crate::aggregation::intermediate_agg_result::IntermediateAggregationResults;
    use crate::aggregation::segment_agg_result::DOC_BLOCK_SIZE;
    use crate::aggregation::DistributedAggregationCollector;
+    use crate::indexer::NoMergePolicy;
    use crate::query::{AllQuery, TermQuery};
-    use crate::schema::{Cardinality, IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
+    use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
    use crate::{DateTime, Index, Term};

    fn get_avg_req(field_name: &str) -> Aggregation {
@@ -432,8 +430,7 @@ mod tests {
        let text_field = schema_builder.add_text_field("text", text_fieldtype.clone());
        let text_field_id = schema_builder.add_text_field("text_id", text_fieldtype);
        let string_field_id = schema_builder.add_text_field("string_id", STRING | FAST);
-        let score_fieldtype =
-            crate::schema::NumericOptions::default().set_fast();
+        let score_fieldtype = crate::schema::NumericOptions::default().set_fast();
        let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
        let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
        let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
@@ -445,6 +442,7 @@ mod tests {
        {
            // let mut index_writer = index.writer_for_tests()?;
            let mut index_writer = index.writer_with_num_threads(1, 30_000_000)?;
+            index_writer.set_merge_policy(Box::new(NoMergePolicy));
            for values in segment_and_values {
                for (i, term) in values {
                    let i = *i;
@@ -656,13 +654,11 @@ mod tests {
        let text_field = schema_builder.add_text_field("text", text_fieldtype);
        let date_field = schema_builder.add_date_field("date", FAST);
        schema_builder.add_text_field("dummy_text", STRING);
-        let score_fieldtype =
-            crate::schema::NumericOptions::default().set_fast();
+        let score_fieldtype = crate::schema::NumericOptions::default().set_fast();
        let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
        let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());

-        let multivalue =
-            crate::schema::NumericOptions::default().set_fast();
+        let multivalue = crate::schema::NumericOptions::default().set_fast();
        let scores_field_i64 = schema_builder.add_i64_field("scores_i64", multivalue);

        let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
@@ -1168,6 +1164,9 @@ mod tests {
    #[cfg(all(test, feature = "unstable"))]
    mod bench {

+        use crate::aggregation::bucket::CustomOrder;
+        use crate::aggregation::bucket::Order;
+        use crate::aggregation::bucket::OrderTarget;
        use rand::prelude::SliceRandom;
        use rand::{thread_rng, Rng};
        use test::{self, Bencher};
@@ -1177,7 +1176,7 @@ mod tests {
        use crate::aggregation::metric::StatsAggregation;
        use crate::query::AllQuery;

-        fn get_test_index_bench(merge_segments: bool) -> crate::Result<Index> {
+        fn get_test_index_bench(_merge_segments: bool) -> crate::Result<Index> {
            let mut schema_builder = Schema::builder();
            let text_fieldtype = crate::schema::TextOptions::default()
                .set_indexing_options(
@@ -1189,20 +1188,19 @@ mod tests {
                schema_builder.add_text_field("text_many_terms", STRING | FAST);
            let text_field_few_terms =
                schema_builder.add_text_field("text_few_terms", STRING | FAST);
-            let score_fieldtype =
-                crate::schema::NumericOptions::default().set_fast();
+            let score_fieldtype = crate::schema::NumericOptions::default().set_fast();
            let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
            let score_field_f64 =
                schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
            let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
            let index = Index::create_from_tempdir(schema_builder.build())?;
            let few_terms_data = vec!["INFO", "ERROR", "WARN", "DEBUG"];
-            let many_terms_data = (0..15_000)
+            let many_terms_data = (0..150_000)
                .map(|num| format!("author{}", num))
                .collect::<Vec<_>>();
            {
                let mut rng = thread_rng();
-                let mut index_writer = index.writer_for_tests()?;
+                let mut index_writer = index.writer_with_num_threads(1, 100_000_000)?;
                // writing the segment
                for _ in 0..1_000_000 {
                    let val: f64 = rng.gen_range(0.0..1_000_000.0);
@@ -1217,14 +1215,6 @@ mod tests {
                }
                index_writer.commit()?;
            }
-            if merge_segments {
-                let segment_ids = index
-                    .searchable_segment_ids()
-                    .expect("Searchable segments failed.");
-                let mut index_writer = index.writer_for_tests()?;
-                index_writer.merge(&segment_ids).wait()?;
-                index_writer.wait_merging_threads()?;
-            }

            Ok(index)
        }
@@ -1376,7 +1366,42 @@ mod tests {
        }

        #[bench]
-        fn bench_aggregation_terms_many(b: &mut Bencher) {
+        fn bench_aggregation_terms_many_with_sub_agg(b: &mut Bencher) {
+            let index = get_test_index_bench(false).unwrap();
+            let reader = index.reader().unwrap();
+
+            b.iter(|| {
+                let sub_agg_req: Aggregations = vec![(
+                    "average_f64".to_string(),
+                    Aggregation::Metric(MetricAggregation::Average(
+                        AverageAggregation::from_field_name("score_f64".to_string()),
+                    )),
+                )]
+                .into_iter()
+                .collect();
+
+                let agg_req: Aggregations = vec![(
+                    "my_texts".to_string(),
+                    Aggregation::Bucket(BucketAggregation {
+                        bucket_agg: BucketAggregationType::Terms(TermsAggregation {
+                            field: "text_many_terms".to_string(),
+                            ..Default::default()
+                        }),
+                        sub_aggregation: sub_agg_req,
+                    }),
+                )]
+                .into_iter()
+                .collect();
+
+                let collector = AggregationCollector::from_aggs(agg_req, None, index.schema());
+
+                let searcher = reader.searcher();
+                searcher.search(&AllQuery, &collector).unwrap()
+            });
+        }
+
+        #[bench]
+        fn bench_aggregation_terms_many2(b: &mut Bencher) {
            let index = get_test_index_bench(false).unwrap();
            let reader = index.reader().unwrap();

@@ -1401,6 +1426,36 @@ mod tests {
            });
        }

+        #[bench]
+        fn bench_aggregation_terms_many_order_by_term(b: &mut Bencher) {
+            let index = get_test_index_bench(false).unwrap();
+            let reader = index.reader().unwrap();
+
+            b.iter(|| {
+                let agg_req: Aggregations = vec![(
+                    "my_texts".to_string(),
+                    Aggregation::Bucket(BucketAggregation {
+                        bucket_agg: BucketAggregationType::Terms(TermsAggregation {
+                            field: "text_many_terms".to_string(),
+                            order: Some(CustomOrder {
+                                order: Order::Desc,
+                                target: OrderTarget::Key,
+                            }),
+                            ..Default::default()
+                        }),
+                        sub_aggregation: Default::default(),
+                    }),
+                )]
+                .into_iter()
+                .collect();
+
+                let collector = AggregationCollector::from_aggs(agg_req, None, index.schema());
+
+                let searcher = reader.searcher();
+                searcher.search(&AllQuery, &collector).unwrap()
+            });
+        }
+
        #[bench]
        fn bench_aggregation_range_only(b: &mut Bencher) {
            let index = get_test_index_bench(false).unwrap();
--- a/src/aggregation/segment_agg_result.rs
+++ b/src/aggregation/segment_agg_result.rs
@@ -25,15 +25,90 @@ use crate::{DocId, TantivyError};
 pub(crate) const DOC_BLOCK_SIZE: usize = 64;
 pub(crate) type DocBlock = [DocId; DOC_BLOCK_SIZE];

-#[derive(Clone, PartialEq)]
-pub(crate) struct SegmentAggregationResultsCollector {
+pub(crate) trait SegmentAggregationCollector: CollectorClone + Debug {
+    fn into_intermediate_aggregations_result(
+        self: Box<Self>,
+        agg_with_accessor: &AggregationsWithAccessor,
+    ) -> crate::Result<IntermediateAggregationResults>;
+
+    fn collect(
+        &mut self,
+        doc: crate::DocId,
+        agg_with_accessor: &AggregationsWithAccessor,
+    ) -> crate::Result<()>;
+
+    fn flush_staged_docs(
+        &mut self,
+        agg_with_accessor: &AggregationsWithAccessor,
+        force_flush: bool,
+    ) -> crate::Result<()>;
+}
+
+pub(crate) trait CollectorClone {
+    fn clone_box(&self) -> Box<dyn SegmentAggregationCollector>;
+}
+
+impl<T> CollectorClone for T
+where
+    T: 'static + SegmentAggregationCollector + Clone,
+{
+    fn clone_box(&self) -> Box<dyn SegmentAggregationCollector> {
+        Box::new(self.clone())
+    }
+}
+
+impl Clone for Box<dyn SegmentAggregationCollector> {
+    fn clone(&self) -> Box<dyn SegmentAggregationCollector> {
+        self.clone_box()
+    }
+}
+
+pub(crate) fn build_segment_agg_collector(
+    req: &AggregationsWithAccessor,
+) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
+    // Single metric special case
+    if req.buckets.is_empty() && req.metrics.len() == 1 {
+        let req = &req.metrics.values[0];
+        let stats_collector = match &req.metric {
+            MetricAggregation::Average(AverageAggregation { .. }) => {
+                SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Average)
+            }
+            MetricAggregation::Count(CountAggregation { .. }) => {
+                SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Count)
+            }
+            MetricAggregation::Max(MaxAggregation { .. }) => {
+                SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Max)
+            }
+            MetricAggregation::Min(MinAggregation { .. }) => {
+                SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Min)
+            }
+            MetricAggregation::Stats(StatsAggregation { .. }) => {
+                SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Stats)
+            }
+            MetricAggregation::Sum(SumAggregation { .. }) => {
+                SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Sum)
+            }
+        };
+
+        return Ok(Box::new(stats_collector));
+    }
+
+    let agg = GenericSegmentAggregationResultsCollector::from_req_and_validate(req)?;
+    Ok(Box::new(agg))
+}
+
+#[derive(Clone)]
+/// The GenericSegmentAggregationResultsCollector is the generic version of the collector, which
+/// can handle arbitrary complexity of  sub-aggregations. Ideally we never have to pick this one
+/// and can provide specialized versions instead, that remove some of its overhead.
+pub(crate) struct GenericSegmentAggregationResultsCollector {
    pub(crate) metrics: Option<VecWithNames<SegmentMetricResultCollector>>,
    pub(crate) buckets: Option<VecWithNames<SegmentBucketResultCollector>>,
    staged_docs: DocBlock,
    num_staged_docs: usize,
 }

-impl Default for SegmentAggregationResultsCollector {
+impl Default for GenericSegmentAggregationResultsCollector {
    fn default() -> Self {
        Self {
            metrics: Default::default(),
@@ -44,7 +119,7 @@ impl Default for SegmentAggregationResultsCollector {
    }
 }

-impl Debug for SegmentAggregationResultsCollector {
+impl Debug for GenericSegmentAggregationResultsCollector {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SegmentAggregationResultsCollector")
            .field("metrics", &self.metrics)
@@ -55,9 +130,9 @@ impl Debug for SegmentAggregationResultsCollector {
    }
 }

-impl SegmentAggregationResultsCollector {
-    pub fn into_intermediate_aggregations_result(
-        self,
+impl SegmentAggregationCollector for GenericSegmentAggregationResultsCollector {
+    fn into_intermediate_aggregations_result(
+        self: Box<Self>,
        agg_with_accessor: &AggregationsWithAccessor,
    ) -> crate::Result<IntermediateAggregationResults> {
        let buckets = if let Some(buckets) = self.buckets {
@@ -75,47 +150,7 @@ impl SegmentAggregationResultsCollector {
        Ok(IntermediateAggregationResults { metrics, buckets })
    }

-    pub(crate) fn from_req_and_validate(req: &AggregationsWithAccessor) -> crate::Result<Self> {
-        let buckets = req
-            .buckets
-            .entries()
-            .map(|(key, req)| {
-                Ok((
-                    key.to_string(),
-                    SegmentBucketResultCollector::from_req_and_validate(req)?,
-                ))
-            })
-            .collect::<crate::Result<Vec<(String, _)>>>()?;
-        let metrics = req
-            .metrics
-            .entries()
-            .map(|(key, req)| {
-                Ok((
-                    key.to_string(),
-                    SegmentMetricResultCollector::from_req_and_validate(req)?,
-                ))
-            })
-            .collect::<crate::Result<Vec<(String, _)>>>()?;
-        let metrics = if metrics.is_empty() {
-            None
-        } else {
-            Some(VecWithNames::from_entries(metrics))
-        };
-        let buckets = if buckets.is_empty() {
-            None
-        } else {
-            Some(VecWithNames::from_entries(buckets))
-        };
-        Ok(SegmentAggregationResultsCollector {
-            metrics,
-            buckets,
-            staged_docs: [0; DOC_BLOCK_SIZE],
-            num_staged_docs: 0,
-        })
-    }
-
-    #[inline]
-    pub(crate) fn collect(
+    fn collect(
        &mut self,
        doc: crate::DocId,
        agg_with_accessor: &AggregationsWithAccessor,
@@ -128,7 +163,7 @@ impl SegmentAggregationResultsCollector {
        Ok(())
    }

-    pub(crate) fn flush_staged_docs(
+    fn flush_staged_docs(
        &mut self,
        agg_with_accessor: &AggregationsWithAccessor,
        force_flush: bool,
@@ -162,6 +197,66 @@ impl SegmentAggregationResultsCollector {
    }
 }

+impl GenericSegmentAggregationResultsCollector {
+    pub fn into_intermediate_aggregations_result(
+        self,
+        agg_with_accessor: &AggregationsWithAccessor,
+    ) -> crate::Result<IntermediateAggregationResults> {
+        let buckets = if let Some(buckets) = self.buckets {
+            let entries = buckets
+                .into_iter()
+                .zip(agg_with_accessor.buckets.values())
+                .map(|((key, bucket), acc)| Ok((key, bucket.into_intermediate_bucket_result(acc)?)))
+                .collect::<crate::Result<Vec<(String, _)>>>()?;
+            Some(VecWithNames::from_entries(entries))
+        } else {
+            None
+        };
+        let metrics = self.metrics.map(VecWithNames::from_other);
+
+        Ok(IntermediateAggregationResults { metrics, buckets })
+    }
+
+    pub(crate) fn from_req_and_validate(req: &AggregationsWithAccessor) -> crate::Result<Self> {
+        let buckets = req
+            .buckets
+            .iter()
+            .map(|(key, req)| {
+                Ok((
+                    key.to_string(),
+                    SegmentBucketResultCollector::from_req_and_validate(req)?,
+                ))
+            })
+            .collect::<crate::Result<Vec<(String, _)>>>()?;
+        let metrics = req
+            .metrics
+            .iter()
+            .map(|(key, req)| {
+                Ok((
+                    key.to_string(),
+                    SegmentMetricResultCollector::from_req_and_validate(req)?,
+                ))
+            })
+            .collect::<crate::Result<Vec<(String, _)>>>()?;
+        let metrics = if metrics.is_empty() {
+            None
+        } else {
+            Some(VecWithNames::from_entries(metrics))
+        };
+        let buckets = if buckets.is_empty() {
+            None
+        } else {
+            Some(VecWithNames::from_entries(buckets))
+        };
+        Ok(GenericSegmentAggregationResultsCollector {
+            metrics,
+            buckets,
+            staged_docs: [0; DOC_BLOCK_SIZE],
+            num_staged_docs: 0,
+        })
+    }
+}
+
 #[derive(Clone, Debug, PartialEq)]
 pub(crate) enum SegmentMetricResultCollector {
    Stats(SegmentStatsCollector),
@@ -205,7 +300,7 @@ impl SegmentMetricResultCollector {
    pub(crate) fn collect_block(&mut self, doc: &[DocId], metric: &MetricAggregationWithAccessor) {
        match self {
            SegmentMetricResultCollector::Stats(stats_collector) => {
-                stats_collector.collect_block(doc, &*metric.accessor);
+                stats_collector.collect_block(doc, &metric.accessor);
            }
        }
    }
@@ -215,7 +310,7 @@ impl SegmentMetricResultCollector {
 /// segments.
 /// The typical structure of Map<Key, Bucket> is not suitable during collection for performance
 /// reasons.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Debug)]
 pub(crate) enum SegmentBucketResultCollector {
    Range(SegmentRangeCollector),
    Histogram(Box<SegmentHistogramCollector>),
@@ -243,14 +338,7 @@ impl SegmentBucketResultCollector {
    pub fn from_req_and_validate(req: &BucketAggregationWithAccessor) -> crate::Result<Self> {
        match &req.bucket_agg {
            BucketAggregationType::Terms(terms_req) => Ok(Self::Terms(Box::new(
-                SegmentTermCollector::from_req_and_validate(
-                    terms_req,
-                    &req.sub_aggregation,
-                    req.field_type,
-                    req.accessor
-                        .as_multi()
-                        .expect("unexpected fast field cardinality"),
-                )?,
+                SegmentTermCollector::from_req_and_validate(terms_req, &req.sub_aggregation)?,
            ))),
            BucketAggregationType::Range(range_req) => {
                Ok(Self::Range(SegmentRangeCollector::from_req_and_validate(
@@ -265,9 +353,7 @@ impl SegmentBucketResultCollector {
                    histogram,
                    &req.sub_aggregation,
                    req.field_type,
-                    req.accessor
-                        .as_single()
-                        .expect("unexpected fast field cardinality"),
+                    &req.accessor,
                )?,
            ))),
        }
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -829,7 +829,7 @@ mod bench {
        let reader = index.reader().unwrap();
        b.iter(|| {
            let searcher = reader.searcher();
-            let facet_collector = FacetCollector::for_field(facet_field);
+            let facet_collector = FacetCollector::for_field("facet");
            searcher.search(&AllQuery, &facet_collector).unwrap();
        });
    }
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -249,7 +249,7 @@ impl SearcherInner {
        index: Index,
        segment_readers: Vec<SegmentReader>,
        generation: TrackedObject<SearcherGeneration>,
-        doc_store_cache_size: usize,
+        doc_store_cache_num_blocks: usize,
    ) -> io::Result<SearcherInner> {
        assert_eq!(
            &segment_readers
@@ -261,7 +261,7 @@ impl SearcherInner {
        );
        let store_readers: Vec<StoreReader> = segment_readers
            .iter()
-            .map(|segment_reader| segment_reader.get_store_reader(doc_store_cache_size))
+            .map(|segment_reader| segment_reader.get_store_reader(doc_store_cache_num_blocks))
            .collect::<io::Result<Vec<_>>>()?;

        Ok(SearcherInner {
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -128,9 +128,12 @@ impl SegmentReader {
        &self.fieldnorm_readers
    }

-    /// Accessor to the segment's `StoreReader`.
-    pub fn get_store_reader(&self, cache_size: usize) -> io::Result<StoreReader> {
-        StoreReader::open(self.store_file.clone(), cache_size)
+    /// Accessor to the segment's [`StoreReader`](crate::store::StoreReader).
+    ///
+    /// `cache_num_blocks` sets the number of decompressed blocks to be cached in an LRU.
+    /// The size of blocks is configurable, this should be reflexted in the
+    pub fn get_store_reader(&self, cache_num_blocks: usize) -> io::Result<StoreReader> {
+        StoreReader::open(self.store_file.clone(), cache_num_blocks)
    }

    /// Open a new segment for reading.
--- a/src/directory/composite_file.rs
+++ b/src/directory/composite_file.rs
@@ -22,7 +22,7 @@ impl FileAddr {
 }

 impl BinarySerializable for FileAddr {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        self.field.serialize(writer)?;
        VInt(self.idx as u64).serialize(writer)?;
        Ok(())
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -189,7 +189,7 @@ mod tests {
        }
        let file = directory.open_read(path).unwrap();

-        assert_eq!(file.len(), 161);
+        assert_eq!(file.len(), 157);
        let fast_field_readers = FastFieldReaders::open(file).unwrap();
        let column = fast_field_readers.u64("field").unwrap();
        assert_eq!(column.get_val(0), 13u64);
@@ -236,7 +236,7 @@ mod tests {
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
-        assert_eq!(file.len(), 189);
+        assert_eq!(file.len(), 185);
        let fast_field_readers = FastFieldReaders::open(file).unwrap();
        let col = fast_field_readers.u64("field").unwrap();
        assert_eq!(col.get_val(0), 4u64);
@@ -266,7 +266,7 @@ mod tests {
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
-        assert_eq!(file.len(), 162);
+        assert_eq!(file.len(), 158);
        let fast_field_readers = FastFieldReaders::open(file).unwrap();
        let fast_field_reader = fast_field_readers.u64("field").unwrap();
        for doc in 0..10_000 {
@@ -295,7 +295,7 @@ mod tests {
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
-        assert_eq!(file.len(), 4557);
+        assert_eq!(file.len(), 80166);
        {
            let fast_field_readers = FastFieldReaders::open(file).unwrap();
            let col = fast_field_readers.u64("field").unwrap();
@@ -325,7 +325,7 @@ mod tests {
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
-        assert_eq!(file.len(), 333_usize);
+        assert_eq!(file.len(), 329_usize);

        {
            let fast_field_readers = FastFieldReaders::open(file).unwrap();
@@ -793,7 +793,7 @@ mod tests {
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
-        assert_eq!(file.len(), 175);
+        assert_eq!(file.len(), 171);
        let fast_field_readers = FastFieldReaders::open(file).unwrap();
        let bool_col = fast_field_readers.bool("field_bool").unwrap();
        assert_eq!(bool_col.get_val(0), true);
@@ -825,7 +825,7 @@ mod tests {
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
-        assert_eq!(file.len(), 187);
+        assert_eq!(file.len(), 183);
        let readers = FastFieldReaders::open(file).unwrap();
        let bool_col = readers.bool("field_bool").unwrap();
        for i in 0..25 {
@@ -850,7 +850,7 @@ mod tests {
            write.terminate().unwrap();
        }
        let file = directory.open_read(path).unwrap();
-        assert_eq!(file.len(), 177);
+        assert_eq!(file.len(), 173);
        let fastfield_readers = FastFieldReaders::open(file).unwrap();
        let col = fastfield_readers.bool("field_bool").unwrap();
        assert_eq!(col.get_val(0), false);
--- a/src/fastfield/readers.rs
+++ b/src/fastfield/readers.rs
@@ -26,10 +26,6 @@ impl FastFieldReaders {
        Ok(FastFieldReaders { columnar })
    }

-    pub(crate) fn columnar(&self) -> &ColumnarReader {
-        self.columnar.as_ref()
-    }
-
    pub(crate) fn space_usage(&self, schema: &Schema) -> io::Result<PerFieldSpaceUsage> {
        let mut per_field_usages: Vec<FieldUsage> = Default::default();
        for (field, field_entry) in schema.fields() {
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -1,7 +1,7 @@
 use std::collections::HashMap;
 use std::sync::Arc;

-use columnar::{ColumnValues, ColumnarReader, MergeRowOrder, StackMergeOrder};
+use columnar::ColumnValues;
 use itertools::Itertools;
 use measure_time::debug_time;

@@ -248,17 +248,13 @@ impl IndexMerger {
        mut term_ord_mappings: HashMap<Field, TermOrdinalMapping>,
        doc_id_mapping: &SegmentDocIdMapping,
    ) -> crate::Result<()> {
-        debug_time!("write-fast-fields");
-        let columnars: Vec<&ColumnarReader> = self
-            .readers
-            .iter()
-            .map(|reader| reader.fast_fields().columnar())
-            .collect();
-        if !doc_id_mapping.is_trivial() {
-            todo!()
+        debug_time!("wrie-fast-fields");
+        for (_field, field_entry) in self.schema.fields() {
+            if field_entry.is_fast() {
+                todo!();
+            }
        }
-        let merge_row_order = MergeRowOrder::Stack(StackMergeOrder::from_columnars(&columnars[..]));
-        columnar::merge_columnar(&columnars[..], merge_row_order, fast_field_wrt)?;
+
        // for (field, field_entry) in self.schema.fields() {
        // let field_type = field_entry.field_type();
        // match field_type {
--- a/src/indexer/merger_sorted_index_test.rs
+++ b/src/indexer/merger_sorted_index_test.rs
@@ -476,12 +476,11 @@ mod bench_sorted_index_merge {

    use std::sync::Arc;

-    use fastfield_codecs::Column;
    use test::{self, Bencher};

    use crate::core::Index;
    use crate::indexer::merger::IndexMerger;
-    use crate::schema::{Cardinality, NumericOptions, Schema};
+    use crate::schema::{NumericOptions, Schema};
    use crate::{IndexSettings, IndexSortByField, IndexWriter, Order};
    fn create_index(sort_by_field: Option<IndexSortByField>) -> Index {
        let mut schema_builder = Schema::builder();
@@ -512,42 +511,42 @@ mod bench_sorted_index_merge {
        index
    }

-    #[bench]
-    fn create_sorted_index_walk_overkmerge_on_merge_fastfield(
-        b: &mut Bencher,
-    ) -> crate::Result<()> {
-        let sort_by_field = IndexSortByField {
-            field: "intval".to_string(),
-            order: Order::Desc,
-        };
-        let index = create_index(Some(sort_by_field.clone()));
-        let segments = index.searchable_segments().unwrap();
-        let merger: IndexMerger =
-            IndexMerger::open(index.schema(), index.settings().clone(), &segments[..])?;
-        let doc_id_mapping = merger.generate_doc_id_mapping(&sort_by_field).unwrap();
-        b.iter(|| {
-            let sorted_doc_ids = doc_id_mapping.iter_old_doc_addrs().map(|doc_addr| {
-                let reader = &merger.readers[doc_addr.segment_ord as usize];
-                let u64_reader: Arc<dyn Column<u64>> = reader
-                    .fast_fields()
-                    .typed_fast_field_reader("intval")
-                    .expect(
-                        "Failed to find a reader for single fast field. This is a tantivy bug and \
-                         it should never happen.",
-                    );
-                (doc_addr.doc_id, reader, u64_reader)
-            });
-            // add values in order of the new doc_ids
-            let mut val = 0;
-            for (doc_id, _reader, field_reader) in sorted_doc_ids {
-                val = field_reader.get_val(doc_id);
-            }
+    //#[bench]
+    // fn create_sorted_index_walk_overkmerge_on_merge_fastfield(
+    // b: &mut Bencher,
+    //) -> crate::Result<()> {
+    // let sort_by_field = IndexSortByField {
+    // field: "intval".to_string(),
+    // order: Order::Desc,
+    //};
+    // let index = create_index(Some(sort_by_field.clone()));
+    // let segments = index.searchable_segments().unwrap();
+    // let merger: IndexMerger =
+    // IndexMerger::open(index.schema(), index.settings().clone(), &segments[..])?;
+    // let doc_id_mapping = merger.generate_doc_id_mapping(&sort_by_field).unwrap();
+    // b.iter(|| {
+    // let sorted_doc_ids = doc_id_mapping.iter_old_doc_addrs().map(|doc_addr| {
+    // let reader = &merger.readers[doc_addr.segment_ord as usize];
+    // let u64_reader: Arc<dyn Column<u64>> = reader
+    //.fast_fields()
+    //.typed_fast_field_reader("intval")
+    //.expect(
+    //"Failed to find a reader for single fast field. This is a tantivy bug and \
+    // it should never happen.",
+    //);
+    //(doc_addr.doc_id, reader, u64_reader)
+    //});
+    //// add values in order of the new doc_ids
+    // let mut val = 0;
+    // for (doc_id, _reader, field_reader) in sorted_doc_ids {
+    // val = field_reader.get_val(doc_id);
+    //}

-            val
-        });
+    // val
+    //});

-        Ok(())
-    }
+    // Ok(())
+    //}
    #[bench]
    fn create_sorted_index_create_doc_id_mapping(b: &mut Bencher) -> crate::Result<()> {
        let sort_by_field = IndexSortByField {
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -19,6 +19,8 @@ mod segment_register;
 pub mod segment_serializer;
 pub mod segment_updater;
 mod segment_writer;
+// mod sorted_doc_id_column;
+// mod sorted_doc_id_multivalue_column;
 mod stamper;

 use crossbeam_channel as channel;
--- a/columnar/src/columnar/merge/sorted_doc_id_column.rs
+++ b/columnar/src/columnar/merge/sorted_doc_id_column.rs
--- a/columnar/src/columnar/merge/sorted_doc_id_multivalue_column.rs
+++ b/columnar/src/columnar/merge/sorted_doc_id_multivalue_column.rs
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -279,7 +279,7 @@ mod indexer;
 pub mod error;
 pub mod tokenizer;

-// pub mod aggregation;
+pub mod aggregation;
 pub mod collector;
 pub mod directory;
 pub mod fastfield;
--- a/src/postings/term_info.rs
+++ b/src/postings/term_info.rs
@@ -39,7 +39,7 @@ impl FixedSize for TermInfo {
 }

 impl BinarySerializable for TermInfo {
-    fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
        self.doc_freq.serialize(writer)?;
        (self.postings_range.start as u64).serialize(writer)?;
        self.posting_num_bytes().serialize(writer)?;
--- a/src/query/range_query/range_query_ip_fastfield.rs
+++ b/src/query/range_query/range_query_ip_fastfield.rs
@@ -88,7 +88,7 @@ fn bound_to_value_range(
 }

 #[cfg(test)]
-mod tests {
+pub mod tests {
    use proptest::prelude::ProptestConfig;
    use proptest::strategy::Strategy;
    use proptest::{prop_oneof, proptest};
@@ -188,7 +188,7 @@ mod tests {
        assert_eq!(count, 2);
    }

-    fn create_index_from_docs(docs: &[Doc]) -> Index {
+    pub fn create_index_from_docs(docs: &[Doc]) -> Index {
        let mut schema_builder = Schema::builder();
        let ip_field = schema_builder.add_ip_addr_field("ip", STORED | FAST);
        let ips_field = schema_builder.add_ip_addr_field("ips", FAST | INDEXED);
--- a/src/query/range_query/range_query_u64_fastfield.rs
+++ b/src/query/range_query/range_query_u64_fastfield.rs
@@ -86,7 +86,7 @@ fn bound_to_value_range<T: MonotonicallyMappableToU64>(
 }

 #[cfg(test)]
-mod tests {
+pub mod tests {
    use std::ops::{Bound, RangeInclusive};

    use proptest::prelude::ProptestConfig;
@@ -191,7 +191,7 @@ mod tests {
        assert!(test_id_range_for_docs(ops).is_ok());
    }

-    fn create_index_from_docs(docs: &[Doc]) -> Index {
+    pub fn create_index_from_docs(docs: &[Doc]) -> Index {
        let mut schema_builder = Schema::builder();
        let id_u64_field = schema_builder.add_u64_field("id", INDEXED | STORED | FAST);
        let ids_u64_field =
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -44,7 +44,7 @@ pub struct IndexReaderBuilder {
    index: Index,
    warmers: Vec<Weak<dyn Warmer>>,
    num_warming_threads: usize,
-    doc_store_cache_size: usize,
+    doc_store_cache_num_blocks: usize,
 }

 impl IndexReaderBuilder {
@@ -55,7 +55,7 @@ impl IndexReaderBuilder {
            index,
            warmers: Vec::new(),
            num_warming_threads: 1,
-            doc_store_cache_size: DOCSTORE_CACHE_CAPACITY,
+            doc_store_cache_num_blocks: DOCSTORE_CACHE_CAPACITY,
        }
    }

@@ -72,7 +72,7 @@ impl IndexReaderBuilder {
            searcher_generation_inventory.clone(),
        )?;
        let inner_reader = InnerIndexReader::new(
-            self.doc_store_cache_size,
+            self.doc_store_cache_num_blocks,
            self.index,
            warming_state,
            searcher_generation_inventory,
@@ -119,8 +119,11 @@ impl IndexReaderBuilder {
    ///
    /// The doc store readers cache by default DOCSTORE_CACHE_CAPACITY(100) decompressed blocks.
    #[must_use]
-    pub fn doc_store_cache_size(mut self, doc_store_cache_size: usize) -> IndexReaderBuilder {
-        self.doc_store_cache_size = doc_store_cache_size;
+    pub fn doc_store_cache_num_blocks(
+        mut self,
+        doc_store_cache_num_blocks: usize,
+    ) -> IndexReaderBuilder {
+        self.doc_store_cache_num_blocks = doc_store_cache_num_blocks;
        self
    }

@@ -151,7 +154,7 @@ impl TryInto<IndexReader> for IndexReaderBuilder {
 }

 struct InnerIndexReader {
-    doc_store_cache_size: usize,
+    doc_store_cache_num_blocks: usize,
    index: Index,
    warming_state: WarmingState,
    searcher: arc_swap::ArcSwap<SearcherInner>,
@@ -161,7 +164,7 @@ struct InnerIndexReader {

 impl InnerIndexReader {
    fn new(
-        doc_store_cache_size: usize,
+        doc_store_cache_num_blocks: usize,
        index: Index,
        warming_state: WarmingState,
        // The searcher_generation_inventory is not used as source, but as target to track the
@@ -172,13 +175,13 @@ impl InnerIndexReader {

        let searcher = Self::create_searcher(
            &index,
-            doc_store_cache_size,
+            doc_store_cache_num_blocks,
            &warming_state,
            &searcher_generation_counter,
            &searcher_generation_inventory,
        )?;
        Ok(InnerIndexReader {
-            doc_store_cache_size,
+            doc_store_cache_num_blocks,
            index,
            warming_state,
            searcher: ArcSwap::from(searcher),
@@ -214,7 +217,7 @@ impl InnerIndexReader {

    fn create_searcher(
        index: &Index,
-        doc_store_cache_size: usize,
+        doc_store_cache_num_blocks: usize,
        warming_state: &WarmingState,
        searcher_generation_counter: &Arc<AtomicU64>,
        searcher_generation_inventory: &Inventory<SearcherGeneration>,
@@ -232,7 +235,7 @@ impl InnerIndexReader {
            index.clone(),
            segment_readers,
            searcher_generation,
-            doc_store_cache_size,
+            doc_store_cache_num_blocks,
        )?);

        warming_state.warm_new_searcher_generation(&searcher.clone().into())?;
@@ -242,7 +245,7 @@ impl InnerIndexReader {
    fn reload(&self) -> crate::Result<()> {
        let searcher = Self::create_searcher(
            &self.index,
-            self.doc_store_cache_size,
+            self.doc_store_cache_num_blocks,
            &self.warming_state,
            &self.searcher_generation_counter,
            &self.searcher_generation_inventory,
--- a/src/schema/document.rs
+++ b/src/schema/document.rs
@@ -228,7 +228,7 @@ impl Document {
 }

 impl BinarySerializable for Document {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        let field_values = self.field_values();
        VInt(field_values.len() as u64).serialize(writer)?;
        for field_value in field_values {
--- a/src/schema/facet.rs
+++ b/src/schema/facet.rs
@@ -191,7 +191,7 @@ impl<'a, T: ?Sized + AsRef<str>> From<&'a T> for Facet {
 }

 impl BinarySerializable for Facet {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        <String as BinarySerializable>::serialize(&self.0, writer)
    }

--- a/src/schema/field.rs
+++ b/src/schema/field.rs
@@ -23,7 +23,7 @@ impl Field {
 }

 impl BinarySerializable for Field {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        self.0.serialize(writer)
    }

--- a/src/schema/field_value.rs
+++ b/src/schema/field_value.rs
@@ -36,7 +36,7 @@ impl From<FieldValue> for Value {
 }

 impl BinarySerializable for FieldValue {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        self.field.serialize(writer)?;
        self.value.serialize(writer)
    }
--- a/src/schema/value.rs
+++ b/src/schema/value.rs
@@ -344,7 +344,7 @@ mod binary_serialize {
    const TOK_STR_CODE: u8 = 0;

    impl BinarySerializable for Value {
-        fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+        fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
            match *self {
                Value::Str(ref text) => {
                    TEXT_CODE.serialize(writer)?;
--- a/src/store/footer.rs
+++ b/src/store/footer.rs
@@ -16,7 +16,7 @@ pub struct DocStoreFooter {
 /// - compressor id: 1 byte
 /// - reserved for future use: 15 bytes
 impl BinarySerializable for DocStoreFooter {
-    fn serialize<W: io::Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
        BinarySerializable::serialize(&DOC_STORE_VERSION, writer)?;
        BinarySerializable::serialize(&self.offset, writer)?;
        BinarySerializable::serialize(&self.decompressor.get_id(), writer)?;
--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -4,8 +4,8 @@
 //! order to be handled in the `Store`.
 //!
 //! Internally, documents (or rather their stored fields) are serialized to a buffer.
-//! When the buffer exceeds 16K, the buffer is compressed using `brotli`, `LZ4` or `snappy`
-//! and the resulting block is written to disk.
+//! When the buffer exceeds `block_size` (defaults to 16K), the buffer is compressed using `brotli`,
+//! `LZ4` or `snappy` and the resulting block is written to disk.
 //!
 //! One can then request for a specific `DocId`.
 //! A skip list helps navigating to the right block,
@@ -28,8 +28,6 @@
 //! - at the segment level, the
 //! [`SegmentReader`'s `doc` method](../struct.SegmentReader.html#method.doc)
 //! - at the index level, the [`Searcher::doc()`](crate::Searcher::doc) method
-//!
-//! !

 mod compressors;
 mod decompressors;
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -114,7 +114,10 @@ impl Sum for CacheStats {

 impl StoreReader {
    /// Opens a store reader
-    pub fn open(store_file: FileSlice, cache_size: usize) -> io::Result<StoreReader> {
+    ///
+    /// `cache_num_blocks` sets the number of decompressed blocks to be cached in an LRU.
+    /// The size of blocks is configurable, this should be reflexted in the
+    pub fn open(store_file: FileSlice, cache_num_blocks: usize) -> io::Result<StoreReader> {
        let (footer, data_and_offset) = DocStoreFooter::extract_footer(store_file)?;

        let (data_file, offset_index_file) = data_and_offset.split(footer.offset as usize);
@@ -125,8 +128,8 @@ impl StoreReader {
            decompressor: footer.decompressor,
            data: data_file,
            cache: BlockCache {
-                cache: NonZeroUsize::new(cache_size)
-                    .map(|cache_size| Mutex::new(LruCache::new(cache_size))),
+                cache: NonZeroUsize::new(cache_num_blocks)
+                    .map(|cache_num_blocks| Mutex::new(LruCache::new(cache_num_blocks))),
                cache_hits: Default::default(),
                cache_misses: Default::default(),
            },
--- a/src/termdict/fst_termdict/term_info_store.rs
+++ b/src/termdict/fst_termdict/term_info_store.rs
@@ -21,7 +21,7 @@ struct TermInfoBlockMeta {
 }

 impl BinarySerializable for TermInfoBlockMeta {
-    fn serialize<W: Write + ?Sized>(&self, write: &mut W) -> io::Result<()> {
+    fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
        self.offset.serialize(write)?;
        self.ref_term_info.serialize(write)?;
        write.write_all(&[
@@ -272,7 +272,7 @@ impl TermInfoStoreWriter {
        Ok(())
    }

-    pub fn serialize<W: io::Write + ?Sized>(&mut self, write: &mut W) -> io::Result<()> {
+    pub fn serialize<W: io::Write>(&mut self, write: &mut W) -> io::Result<()> {
        if !self.term_infos.is_empty() {
            self.flush_block()?;
        }
--- a/sstable/src/dictionary.rs
+++ b/sstable/src/dictionary.rs
@@ -30,7 +30,6 @@ use crate::{BlockAddr, DeltaReader, Reader, SSTable, SSTableIndex, TermOrdinal,
 /// block boundary.
 ///
 /// (See also README.md)
-#[derive(Debug, Clone)]
 pub struct Dictionary<TSSTable: SSTable = VoidSSTable> {
    pub sstable_slice: FileSlice,
    pub sstable_index: SSTableIndex,
--- a/sstable/src/lib.rs
+++ b/sstable/src/lib.rs
@@ -117,7 +117,6 @@ impl SSTable for MonotonicU64SSTable {
 /// `range_sstable[k1].end == range_sstable[k2].start`.
 ///
 /// The first range is not required to start at `0`.
-#[derive(Clone, Copy, Debug)]
 pub struct RangeSSTable;

 impl SSTable for RangeSSTable {
--- a/sstable/src/sstable_index.rs
+++ b/sstable/src/sstable_index.rs
@@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};

 use crate::{common_prefix_len, SSTableDataCorruption, TermOrdinal};

-#[derive(Default, Debug, Clone, Serialize, Deserialize)]
+#[derive(Default, Debug, Serialize, Deserialize)]
 pub struct SSTableIndex {
    blocks: Vec<BlockMeta>,
 }
@@ -75,7 +75,7 @@ pub struct BlockAddr {
    pub first_ordinal: u64,
 }

-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Serialize, Deserialize)]
 pub(crate) struct BlockMeta {
    /// Any byte string that is lexicographically greater or equal to
    /// the last key in the block,
Author	SHA1	Message	Date
Pascal Seitz	6e664d071f	perf: use term ordinal order when sorting by keys term ordinals are sorted lexicographically. we can use than to sort instead of reading the terms from the dictionary.	2023-02-09 16:27:18 +08:00
Pascal Seitz	cdffce906c	introduce SegmentAggregationCollector trait add dynamic dispatch for aggregation collection, to allow specialized implementations	2023-01-30 14:30:40 +08:00
Pascal Seitz	f08602fb7a	enable aggregation on columnar enable aggregation on columnar add support for multivalue for all types	2023-01-26 18:25:03 +08:00
Paul Masurel	f15cbfe368	Integration of columnar	2023-01-26 14:15:39 +08:00
Paul Masurel	d850054228	Introduced a select cursor.	2023-01-26 14:14:08 +08:00
PSeitz	0f20787917	fix doc store cache docs (#1821 ) * fix doc store cache docs addresses an issue reported in #1820 * rename doc_store_cache_size	2023-01-23 07:06:49 +01:00
Paul Masurel	2874554ee4	Removed the sorting logic that forced column type to be sorted like (#1816 ) * Removed the sorting logic that forced column type to be sorted like ColumnTypes. * add comments Co-authored-by: PSeitz <PSeitz@users.noreply.github.com>	2023-01-20 12:43:28 +01:00
PSeitz	cbc70a9eae	Cargo.toml cleanup (#1817 )	2023-01-20 12:30:35 +01:00
PSeitz	226d0f88bc	add columnar to workspace (#1808 )	2023-01-20 11:47:10 +01:00