Speed up searches by removing repeated memsets coming from vec.resize()

Also, reserve exactly the size needed, which is surprisingly needed to get the full speedup of ~5% on a good fraction of the queries.
2026-01-06 09:12:55 +00:00 · 2024-03-12 17:50:23 +01:00
101 changed files with 524 additions and 1123 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,12 +11,12 @@ repository = "https://github.com/quickwit-oss/tantivy"
 readme = "README.md"
 keywords = ["search", "information", "retrieval"]
 edition = "2021"
-rust-version = "1.63"
+rust-version = "1.62"
 exclude = ["benches/*.json", "benches/*.txt"]
 [dependencies]
 oneshot = "0.1.5"
-base64 = "0.22.0"
+base64 = "0.21.0"
 byteorder = "1.4.3"
 crc32fast = "1.3.2"
 once_cell = "1.10.0"
@@ -78,9 +78,6 @@ paste = "1.0.11"
 more-asserts = "0.3.1"
 rand_distr = "0.4.3"
 time = { version = "0.3.10", features = ["serde-well-known", "macros"] }
 postcard = { version = "1.0.4", features = [
  "use-std",
 ], default-features = false }
 [target.'cfg(not(windows))'.dev-dependencies]
 criterion = { version = "0.5", default-features = false }
--- a/bitpacker/src/bitpacker.rs
+++ b/bitpacker/src/bitpacker.rs
@@ -1,3 +1,4 @@
 use std::convert::TryInto;
 use std::io;
 use std::ops::{Range, RangeInclusive};
@@ -124,6 +125,8 @@ impl BitUnpacker {
    // Decodes the range of bitpacked `u32` values with idx
    // in [start_idx, start_idx + output.len()).
    // It is guaranteed to completely fill `output` and not read from it, so passing a vector with
    // un-initialized values is safe.
    //
    // #Panics
    //
@@ -236,7 +239,19 @@ impl BitUnpacker {
        data: &[u8],
        positions: &mut Vec<u32>,
    ) {
-        positions.resize(id_range.len(), 0u32);
+        // We use the code below instead of positions.resize(id_range.len(), 0u32) for performance
        // reasons: on some queries, the CPU cost of memsetting the array and of using a bigger
        // vector than necessary is noticeable (~5%).
        // In particular, searches are a few percent faster when using reserve_exact() as below
        // instead of reserve().
        // The un-initialized values are safe as get_batch_u32s() completely fills `positions`
        // and does not read from it.
        positions.clear();
        positions.reserve_exact(id_range.len());
        #[allow(clippy::uninit_vec)]
        unsafe {
            positions.set_len(id_range.len());
        }
        self.get_batch_u32s(id_range.start, data, positions);
        crate::filter_vec::filter_vec_in_place(value_range, id_range.start, positions)
    }
--- a/columnar/Cargo.toml
+++ b/columnar/Cargo.toml
@@ -17,7 +17,6 @@ sstable = { version= "0.2", path = "../sstable", package = "tantivy-sstable" }
 common = { version= "0.6", path = "../common", package = "tantivy-common" }
 tantivy-bitpacker = { version= "0.5", path = "../bitpacker/" }
 serde = "1.0.152"
 downcast-rs = "1.2.0"
 [dev-dependencies]
 proptest = "1"
--- a/columnar/benches/bench_first_vals.rs
+++ b/columnar/benches/bench_first_vals.rs
@@ -1,155 +0,0 @@
 #![feature(test)]
 extern crate test;
 use std::sync::Arc;
 use rand::prelude::*;
 use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType};
 use tantivy_columnar::*;
 use test::{black_box, Bencher};
 struct Columns {
    pub optional: Column,
    pub full: Column,
    pub multi: Column,
 }
 fn get_test_columns() -> Columns {
    let data = generate_permutation();
    let mut dataframe_writer = ColumnarWriter::default();
    for (idx, val) in data.iter().enumerate() {
        dataframe_writer.record_numerical(idx as u32, "full_values", NumericalValue::U64(*val));
        if idx % 2 == 0 {
            dataframe_writer.record_numerical(
                idx as u32,
                "optional_values",
                NumericalValue::U64(*val),
            );
        }
        dataframe_writer.record_numerical(idx as u32, "multi_values", NumericalValue::U64(*val));
        dataframe_writer.record_numerical(idx as u32, "multi_values", NumericalValue::U64(*val));
    }
    let mut buffer: Vec<u8> = Vec::new();
    dataframe_writer
        .serialize(data.len() as u32, None, &mut buffer)
        .unwrap();
    let columnar = ColumnarReader::open(buffer).unwrap();
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("optional_values").unwrap();
    assert_eq!(cols.len(), 1);
    let optional = cols[0].open_u64_lenient().unwrap().unwrap();
    assert_eq!(optional.index.get_cardinality(), Cardinality::Optional);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("full_values").unwrap();
    assert_eq!(cols.len(), 1);
    let column_full = cols[0].open_u64_lenient().unwrap().unwrap();
    assert_eq!(column_full.index.get_cardinality(), Cardinality::Full);
    let cols: Vec<DynamicColumnHandle> = columnar.read_columns("multi_values").unwrap();
    assert_eq!(cols.len(), 1);
    let multi = cols[0].open_u64_lenient().unwrap().unwrap();
    assert_eq!(multi.index.get_cardinality(), Cardinality::Multivalued);
    Columns {
        optional,
        full: column_full,
        multi,
    }
 }
 const NUM_VALUES: u64 = 100_000;
 fn generate_permutation() -> Vec<u64> {
    let mut permutation: Vec<u64> = (0u64..NUM_VALUES).collect();
    permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
    permutation
 }
 pub fn serialize_and_load(column: &[u64], codec_type: CodecType) -> Arc<dyn ColumnValues<u64>> {
    serialize_and_load_u64_based_column_values(&column, &[codec_type])
 }
 fn run_bench_on_column_full_scan(b: &mut Bencher, column: Column) {
    let num_iter = black_box(NUM_VALUES);
    b.iter(|| {
        let mut sum = 0u64;
        for i in 0..num_iter as u32 {
            let val = column.first(i);
            sum += val.unwrap_or(0);
        }
        sum
    });
 }
 fn run_bench_on_column_block_fetch(b: &mut Bencher, column: Column) {
    let mut block: Vec<Option<u64>> = vec![None; 64];
    let fetch_docids = (0..64).collect::<Vec<_>>();
    b.iter(move || {
        column.first_vals(&fetch_docids, &mut block);
        block[0]
    });
 }
 fn run_bench_on_column_block_single_calls(b: &mut Bencher, column: Column) {
    let mut block: Vec<Option<u64>> = vec![None; 64];
    let fetch_docids = (0..64).collect::<Vec<_>>();
    b.iter(move || {
        for i in 0..fetch_docids.len() {
            block[i] = column.first(fetch_docids[i]);
        }
        block[0]
    });
 }
 /// Column first method
 #[bench]
 fn bench_get_first_on_full_column_full_scan(b: &mut Bencher) {
    let column = get_test_columns().full;
    run_bench_on_column_full_scan(b, column);
 }
 #[bench]
 fn bench_get_first_on_optional_column_full_scan(b: &mut Bencher) {
    let column = get_test_columns().optional;
    run_bench_on_column_full_scan(b, column);
 }
 #[bench]
 fn bench_get_first_on_multi_column_full_scan(b: &mut Bencher) {
    let column = get_test_columns().multi;
    run_bench_on_column_full_scan(b, column);
 }
 /// Block fetch column accessor
 #[bench]
 fn bench_get_block_first_on_optional_column(b: &mut Bencher) {
    let column = get_test_columns().optional;
    run_bench_on_column_block_fetch(b, column);
 }
 #[bench]
 fn bench_get_block_first_on_multi_column(b: &mut Bencher) {
    let column = get_test_columns().multi;
    run_bench_on_column_block_fetch(b, column);
 }
 #[bench]
 fn bench_get_block_first_on_full_column(b: &mut Bencher) {
    let column = get_test_columns().full;
    run_bench_on_column_block_fetch(b, column);
 }
 #[bench]
 fn bench_get_block_first_on_optional_column_single_calls(b: &mut Bencher) {
    let column = get_test_columns().optional;
    run_bench_on_column_block_single_calls(b, column);
 }
 #[bench]
 fn bench_get_block_first_on_multi_column_single_calls(b: &mut Bencher) {
    let column = get_test_columns().multi;
    run_bench_on_column_block_single_calls(b, column);
 }
 #[bench]
 fn bench_get_block_first_on_full_column_single_calls(b: &mut Bencher) {
    let column = get_test_columns().full;
    run_bench_on_column_block_single_calls(b, column);
 }
--- a/columnar/benches/bench_values_u128.rs
+++ b/columnar/benches/bench_values_u128.rs
--- a/columnar/benches/bench_values_u64.rs
+++ b/columnar/benches/bench_values_u64.rs
@@ -16,6 +16,14 @@ fn generate_permutation() -> Vec<u64> {
    permutation
 }
 fn generate_random() -> Vec<u64> {
    let mut permutation: Vec<u64> = (0u64..100_000u64)
        .map(|el| el + random::<u16>() as u64)
        .collect();
    permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
    permutation
 }
 // Warning: this generates the same permutation at each call
 fn generate_permutation_gcd() -> Vec<u64> {
    let mut permutation: Vec<u64> = (1u64..100_000u64).map(|el| el * 1000).collect();
--- a/columnar/src/block_accessor.rs
+++ b/columnar/src/block_accessor.rs
@@ -14,32 +14,20 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
    ColumnBlockAccessor<T>
 {
    #[inline]
-    pub fn fetch_block<'a>(&'a mut self, docs: &'a [u32], accessor: &Column<T>) {
+    pub fn fetch_block(&mut self, docs: &[u32], accessor: &Column<T>) {
-        if accessor.index.get_cardinality().is_full() {
+        self.docid_cache.clear();
-            self.val_cache.resize(docs.len(), T::default());
+        self.row_id_cache.clear();
-            accessor.values.get_vals(docs, &mut self.val_cache);
+        accessor.row_ids_for_docs(docs, &mut self.docid_cache, &mut self.row_id_cache);
-        } else {
+        self.val_cache.resize(self.row_id_cache.len(), T::default());
-            self.docid_cache.clear();
+        accessor
-            self.row_id_cache.clear();
+            .values
-            accessor.row_ids_for_docs(docs, &mut self.docid_cache, &mut self.row_id_cache);
+            .get_vals(&self.row_id_cache, &mut self.val_cache);
            self.val_cache.resize(self.row_id_cache.len(), T::default());
            accessor
                .values
                .get_vals(&self.row_id_cache, &mut self.val_cache);
        }
    }
    #[inline]
    pub fn fetch_block_with_missing(&mut self, docs: &[u32], accessor: &Column<T>, missing: T) {
        self.fetch_block(docs, accessor);
-        // no missing values
+        // We can compare docid_cache with docs to find missing docs
-        if accessor.index.get_cardinality().is_full() {
+        if docs.len() != self.docid_cache.len() || accessor.index.is_multivalue() {
            return;
        }
        // We can compare docid_cache length with docs to find missing docs
        // For multi value columns we can't rely on the length and always need to scan
        if accessor.index.get_cardinality().is_multivalue() || docs.len() != self.docid_cache.len()
        {
            self.missing_docids_cache.clear();
            find_missing_docs(docs, &self.docid_cache, |doc| {
                self.missing_docids_cache.push(doc);
@@ -56,25 +44,11 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
    }
    #[inline]
-    /// Returns an iterator over the docids and values
+    pub fn iter_docid_vals(&self) -> impl Iterator<Item = (DocId, T)> + '_ {
-    /// The passed in `docs` slice needs to be the same slice that was passed to `fetch_block` or
+        self.docid_cache
-    /// `fetch_block_with_missing`.
+            .iter()
-    ///
+            .cloned()
-    /// The docs is used if the column is full (each docs has exactly one value), otherwise the
+            .zip(self.val_cache.iter().cloned())
    /// internal docid vec is used for the iterator, which e.g. may contain duplicate docs.
    pub fn iter_docid_vals<'a>(
        &'a self,
        docs: &'a [u32],
        accessor: &Column<T>,
    ) -> impl Iterator<Item = (DocId, T)> + '_ {
        if accessor.index.get_cardinality().is_full() {
            docs.iter().cloned().zip(self.val_cache.iter().cloned())
        } else {
            self.docid_cache
                .iter()
                .cloned()
                .zip(self.val_cache.iter().cloned())
        }
    }
 }
--- a/columnar/src/column/mod.rs
+++ b/columnar/src/column/mod.rs
@@ -3,17 +3,17 @@ mod serialize;
 use std::fmt::{self, Debug};
 use std::io::Write;
-use std::ops::{Range, RangeInclusive};
+use std::ops::{Deref, Range, RangeInclusive};
 use std::sync::Arc;
 use common::BinarySerializable;
 pub use dictionary_encoded::{BytesColumn, StrColumn};
 pub use serialize::{
-    open_column_bytes, open_column_str, open_column_u128, open_column_u128_as_compact_u64,
+    open_column_bytes, open_column_str, open_column_u128, open_column_u64,
-    open_column_u64, serialize_column_mappable_to_u128, serialize_column_mappable_to_u64,
+    serialize_column_mappable_to_u128, serialize_column_mappable_to_u64,
 };
-use crate::column_index::{ColumnIndex, Set};
+use crate::column_index::ColumnIndex;
 use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
 use crate::column_values::{monotonic_map_column, ColumnValues};
 use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
@@ -83,36 +83,10 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
        self.values.max_value()
    }
    #[inline]
    pub fn first(&self, row_id: RowId) -> Option<T> {
        self.values_for_doc(row_id).next()
    }
    /// Load the first value for each docid in the provided slice.
    #[inline]
    pub fn first_vals(&self, docids: &[DocId], output: &mut [Option<T>]) {
        match &self.index {
            ColumnIndex::Empty { .. } => {}
            ColumnIndex::Full => self.values.get_vals_opt(docids, output),
            ColumnIndex::Optional(optional_index) => {
                for (i, docid) in docids.iter().enumerate() {
                    output[i] = optional_index
                        .rank_if_exists(*docid)
                        .map(|rowid| self.values.get_val(rowid));
                }
            }
            ColumnIndex::Multivalued(multivalued_index) => {
                for (i, docid) in docids.iter().enumerate() {
                    let range = multivalued_index.range(*docid);
                    let is_empty = range.start == range.end;
                    if !is_empty {
                        output[i] = Some(self.values.get_val(range.start));
                    }
                }
            }
        }
    }
    /// Translates a block of docis to row_ids.
    ///
    /// returns the row_ids and the matching docids on the same index
@@ -131,8 +105,7 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
    }
    pub fn values_for_doc(&self, doc_id: DocId) -> impl Iterator<Item = T> + '_ {
-        self.index
+        self.value_row_ids(doc_id)
            .value_row_ids(doc_id)
            .map(|value_row_id: RowId| self.values.get_val(value_row_id))
    }
@@ -174,6 +147,14 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
    }
 }
 impl<T> Deref for Column<T> {
    type Target = ColumnIndex;
    fn deref(&self) -> &Self::Target {
        &self.index
    }
 }
 impl BinarySerializable for Cardinality {
    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> std::io::Result<()> {
        self.to_code().serialize(writer)
@@ -195,7 +176,6 @@ struct FirstValueWithDefault<T: Copy> {
 impl<T: PartialOrd + Debug + Send + Sync + Copy + 'static> ColumnValues<T>
    for FirstValueWithDefault<T>
 {
    #[inline(always)]
    fn get_val(&self, idx: u32) -> T {
        self.column.first(idx).unwrap_or(self.default_value)
    }
--- a/columnar/src/column/serialize.rs
+++ b/columnar/src/column/serialize.rs
@@ -76,26 +76,6 @@ pub fn open_column_u128<T: MonotonicallyMappableToU128>(
    })
 }
 /// Open the column as u64.
 ///
 /// See [`open_u128_as_compact_u64`] for more details.
 pub fn open_column_u128_as_compact_u64(bytes: OwnedBytes) -> io::Result<Column<u64>> {
    let (body, column_index_num_bytes_payload) = bytes.rsplit(4);
    let column_index_num_bytes = u32::from_le_bytes(
        column_index_num_bytes_payload
            .as_slice()
            .try_into()
            .unwrap(),
    );
    let (column_index_data, column_values_data) = body.split(column_index_num_bytes as usize);
    let column_index = crate::column_index::open_column_index(column_index_data)?;
    let column_values = crate::column_values::open_u128_as_compact_u64(column_values_data)?;
    Ok(Column {
        index: column_index,
        values: column_values,
    })
 }
 pub fn open_column_bytes(data: OwnedBytes) -> io::Result<BytesColumn> {
    let (body, dictionary_len_bytes) = data.rsplit(4);
    let dictionary_len = u32::from_le_bytes(dictionary_len_bytes.as_slice().try_into().unwrap());
--- a/columnar/src/column_index/merge/shuffled.rs
+++ b/columnar/src/column_index/merge/shuffled.rs
@@ -140,7 +140,7 @@ mod tests {
    #[test]
    fn test_merge_column_index_optional_shuffle() {
        let optional_index: ColumnIndex = OptionalIndex::for_test(2, &[0]).into();
-        let column_indexes = [optional_index, ColumnIndex::Full];
+        let column_indexes = vec![optional_index, ColumnIndex::Full];
        let row_addrs = vec![
            RowAddr {
                segment_ord: 0u32,
--- a/columnar/src/column_index/mod.rs
+++ b/columnar/src/column_index/mod.rs
@@ -42,6 +42,10 @@ impl From<MultiValueIndex> for ColumnIndex {
 }
 impl ColumnIndex {
    #[inline]
    pub fn is_multivalue(&self) -> bool {
        matches!(self, ColumnIndex::Multivalued(_))
    }
    /// Returns the cardinality of the column index.
    ///
    /// By convention, if the column contains no docs, we consider that it is
--- a/columnar/src/column_index/optional_index/set_block/dense.rs
+++ b/columnar/src/column_index/optional_index/set_block/dense.rs
@@ -1,3 +1,4 @@
 use std::convert::TryInto;
 use std::io::{self, Write};
 use common::BinarySerializable;
--- a/columnar/src/column_index/optional_index/tests.rs
+++ b/columnar/src/column_index/optional_index/tests.rs
@@ -1,4 +1,5 @@
-use proptest::prelude::*;
+use proptest::prelude::{any, prop, *};
 use proptest::strategy::Strategy;
 use proptest::{prop_oneof, proptest};
 use super::*;
--- a/columnar/src/column_values/merge.rs
+++ b/columnar/src/column_values/merge.rs
@@ -10,7 +10,7 @@ pub(crate) struct MergedColumnValues<'a, T> {
    pub(crate) merge_row_order: &'a MergeRowOrder,
 }
-impl<'a, T: Copy + PartialOrd + Debug + 'static> Iterable<T> for MergedColumnValues<'a, T> {
+impl<'a, T: Copy + PartialOrd + Debug> Iterable<T> for MergedColumnValues<'a, T> {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
        match self.merge_row_order {
            MergeRowOrder::Stack(_) => Box::new(
--- a/columnar/src/column_values/mod.rs
+++ b/columnar/src/column_values/mod.rs
@@ -10,7 +10,6 @@ use std::fmt::Debug;
 use std::ops::{Range, RangeInclusive};
 use std::sync::Arc;
 use downcast_rs::DowncastSync;
 pub use monotonic_mapping::{MonotonicallyMappableToU64, StrictlyMonotonicFn};
 pub use monotonic_mapping_u128::MonotonicallyMappableToU128;
@@ -26,10 +25,7 @@ mod monotonic_column;
 pub(crate) use merge::MergedColumnValues;
 pub use stats::ColumnStats;
-pub use u128_based::{
+pub use u128_based::{open_u128_mapped, serialize_column_values_u128};
    open_u128_as_compact_u64, open_u128_mapped, serialize_column_values_u128,
    CompactSpaceU64Accessor,
 };
 pub use u64_based::{
    load_u64_based_column_values, serialize_and_load_u64_based_column_values,
    serialize_u64_based_column_values, CodecType, ALL_U64_CODEC_TYPES,
@@ -45,7 +41,7 @@ use crate::RowId;
 ///
 /// Any methods with a default and specialized implementation need to be called in the
 /// wrappers that implement the trait: Arc and MonotonicMappingColumn
-pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
+pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
    /// Return the value associated with the given idx.
    ///
    /// This accessor should return as fast as possible.
@@ -72,40 +68,11 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
            out_x4[3] = self.get_val(idx_x4[3]);
        }
-        let out_and_idx_chunks = output
+        let step_size = 4;
-            .chunks_exact_mut(4)
+        let cutoff = indexes.len() - indexes.len() % step_size;
            .into_remainder()
            .iter_mut()
            .zip(indexes.chunks_exact(4).remainder());
        for (out, idx) in out_and_idx_chunks {
            *out = self.get_val(*idx);
        }
    }
-    /// Allows to push down multiple fetch calls, to avoid dynamic dispatch overhead.
+        for idx in cutoff..indexes.len() {
-    /// The slightly weird `Option<T>` in output allows pushdown to full columns.
+            output[idx] = self.get_val(indexes[idx]);
    ///
    /// idx and output should have the same length
    ///
    /// # Panics
    ///
    /// May panic if `idx` is greater than the column length.
    fn get_vals_opt(&self, indexes: &[u32], output: &mut [Option<T>]) {
        assert!(indexes.len() == output.len());
        let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4));
        for (out_x4, idx_x4) in out_and_idx_chunks {
            out_x4[0] = Some(self.get_val(idx_x4[0]));
            out_x4[1] = Some(self.get_val(idx_x4[1]));
            out_x4[2] = Some(self.get_val(idx_x4[2]));
            out_x4[3] = Some(self.get_val(idx_x4[3]));
        }
        let out_and_idx_chunks = output
            .chunks_exact_mut(4)
            .into_remainder()
            .iter_mut()
            .zip(indexes.chunks_exact(4).remainder());
        for (out, idx) in out_and_idx_chunks {
            *out = Some(self.get_val(*idx));
        }
    }
@@ -172,7 +139,6 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
        Box::new((0..self.num_vals()).map(|idx| self.get_val(idx)))
    }
 }
 downcast_rs::impl_downcast!(sync ColumnValues<T> where T: PartialOrd);
 /// Empty column of values.
 pub struct EmptyColumnValues;
@@ -195,17 +161,12 @@ impl<T: PartialOrd + Default> ColumnValues<T> for EmptyColumnValues {
    }
 }
-impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnValues<T>> {
+impl<T: Copy + PartialOrd + Debug> ColumnValues<T> for Arc<dyn ColumnValues<T>> {
    #[inline(always)]
    fn get_val(&self, idx: u32) -> T {
        self.as_ref().get_val(idx)
    }
    #[inline(always)]
    fn get_vals_opt(&self, indexes: &[u32], output: &mut [Option<T>]) {
        self.as_ref().get_vals_opt(indexes, output)
    }
    #[inline(always)]
    fn min_value(&self) -> T {
        self.as_ref().min_value()
--- a/columnar/src/column_values/monotonic_column.rs
+++ b/columnar/src/column_values/monotonic_column.rs
@@ -31,10 +31,10 @@ pub fn monotonic_map_column<C, T, Input, Output>(
    monotonic_mapping: T,
 ) -> impl ColumnValues<Output>
 where
-    C: ColumnValues<Input> + 'static,
+    C: ColumnValues<Input>,
-    T: StrictlyMonotonicFn<Input, Output> + Send + Sync + 'static,
+    T: StrictlyMonotonicFn<Input, Output> + Send + Sync,
-    Input: PartialOrd + Debug + Send + Sync + Clone + 'static,
+    Input: PartialOrd + Debug + Send + Sync + Clone,
-    Output: PartialOrd + Debug + Send + Sync + Clone + 'static,
+    Output: PartialOrd + Debug + Send + Sync + Clone,
 {
    MonotonicMappingColumn {
        from_column,
@@ -45,10 +45,10 @@ where
 impl<C, T, Input, Output> ColumnValues<Output> for MonotonicMappingColumn<C, T, Input>
 where
-    C: ColumnValues<Input> + 'static,
+    C: ColumnValues<Input>,
-    T: StrictlyMonotonicFn<Input, Output> + Send + Sync + 'static,
+    T: StrictlyMonotonicFn<Input, Output> + Send + Sync,
-    Input: PartialOrd + Send + Debug + Sync + Clone + 'static,
+    Input: PartialOrd + Send + Debug + Sync + Clone,
-    Output: PartialOrd + Send + Debug + Sync + Clone + 'static,
+    Output: PartialOrd + Send + Debug + Sync + Clone,
 {
    #[inline(always)]
    fn get_val(&self, idx: u32) -> Output {
@@ -107,7 +107,7 @@ mod tests {
    #[test]
    fn test_monotonic_mapping_iter() {
        let vals: Vec<u64> = (0..100u64).map(|el| el * 10).collect();
-        let col = VecColumn::from(vals);
+        let col = VecColumn::from(&vals);
        let mapped = monotonic_map_column(
            col,
            StrictlyMonotonicMappingInverter::from(StrictlyMonotonicMappingToInternal::<i64>::new()),
--- a/columnar/src/column_values/u128_based/compact_space/mod.rs
+++ b/columnar/src/column_values/u128_based/compact_space/mod.rs
@@ -22,7 +22,7 @@ mod build_compact_space;
 use build_compact_space::get_compact_space;
 use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
-use tantivy_bitpacker::{BitPacker, BitUnpacker};
+use tantivy_bitpacker::{self, BitPacker, BitUnpacker};
 use crate::column_values::ColumnValues;
 use crate::RowId;
@@ -148,7 +148,7 @@ impl CompactSpace {
            .binary_search_by_key(&compact, |range_mapping| range_mapping.compact_start)
            // Correctness: Overflow. The first range starts at compact space 0, the error from
            // binary search can never be 0
-            .unwrap_or_else(|e| e - 1);
+            .map_or_else(|e| e - 1, |v| v);
        let range_mapping = &self.ranges_mapping[pos];
        let diff = compact - range_mapping.compact_start;
@@ -292,63 +292,6 @@ impl BinarySerializable for IPCodecParams {
    }
 }
 /// Exposes the compact space compressed values as u64.
 ///
 /// This allows faster access to the values, as u64 is faster to work with than u128.
 /// It also allows to handle u128 values like u64, via the `open_u64_lenient` as a uniform
 /// access interface.
 ///
 /// When converting from the internal u64 to u128 `compact_to_u128` can be used.
 pub struct CompactSpaceU64Accessor(CompactSpaceDecompressor);
 impl CompactSpaceU64Accessor {
    pub(crate) fn open(data: OwnedBytes) -> io::Result<CompactSpaceU64Accessor> {
        let decompressor = CompactSpaceU64Accessor(CompactSpaceDecompressor::open(data)?);
        Ok(decompressor)
    }
    /// Convert a compact space value to u128
    pub fn compact_to_u128(&self, compact: u32) -> u128 {
        self.0.compact_to_u128(compact)
    }
 }
 impl ColumnValues<u64> for CompactSpaceU64Accessor {
    #[inline]
    fn get_val(&self, doc: u32) -> u64 {
        let compact = self.0.get_compact(doc);
        compact as u64
    }
    fn min_value(&self) -> u64 {
        self.0.u128_to_compact(self.0.min_value()).unwrap() as u64
    }
    fn max_value(&self) -> u64 {
        self.0.u128_to_compact(self.0.max_value()).unwrap() as u64
    }
    fn num_vals(&self) -> u32 {
        self.0.params.num_vals
    }
    #[inline]
    fn iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
        Box::new(self.0.iter_compact().map(|el| el as u64))
    }
    #[inline]
    fn get_row_ids_for_value_range(
        &self,
        value_range: RangeInclusive<u64>,
        position_range: Range<u32>,
        positions: &mut Vec<u32>,
    ) {
        let value_range = self.0.compact_to_u128(*value_range.start() as u32)
            ..=self.0.compact_to_u128(*value_range.end() as u32);
        self.0
            .get_row_ids_for_value_range(value_range, position_range, positions)
    }
 }
 impl ColumnValues<u128> for CompactSpaceDecompressor {
    #[inline]
    fn get_val(&self, doc: u32) -> u128 {
@@ -459,14 +402,9 @@ impl CompactSpaceDecompressor {
            .map(|compact| self.compact_to_u128(compact))
    }
    #[inline]
    pub fn get_compact(&self, idx: u32) -> u32 {
        self.params.bit_unpacker.get(idx, &self.data) as u32
    }
    #[inline]
    pub fn get(&self, idx: u32) -> u128 {
-        let compact = self.get_compact(idx);
+        let compact = self.params.bit_unpacker.get(idx, &self.data) as u32;
        self.compact_to_u128(compact)
    }
--- a/columnar/src/column_values/u128_based/mod.rs
+++ b/columnar/src/column_values/u128_based/mod.rs
@@ -6,9 +6,7 @@ use std::sync::Arc;
 mod compact_space;
 use common::{BinarySerializable, OwnedBytes, VInt};
-pub use compact_space::{
+use compact_space::{CompactSpaceCompressor, CompactSpaceDecompressor};
    CompactSpaceCompressor, CompactSpaceDecompressor, CompactSpaceU64Accessor,
 };
 use crate::column_values::monotonic_map_column;
 use crate::column_values::monotonic_mapping::{
@@ -110,23 +108,6 @@ pub fn open_u128_mapped<T: MonotonicallyMappableToU128 + Debug>(
        StrictlyMonotonicMappingToInternal::<T>::new().into();
    Ok(Arc::new(monotonic_map_column(reader, inverted)))
 }
 /// Returns the u64 representation of the u128 data.
 /// The internal representation of the data as u64 is useful for faster processing.
 ///
 /// In order to convert to u128 back cast to `CompactSpaceU64Accessor` and call
 /// `compact_to_u128`.
 ///
 /// # Notice
 /// In case there are new codecs added, check for usages of `CompactSpaceDecompressorU64` and
 /// also handle the new codecs.
 pub fn open_u128_as_compact_u64(mut bytes: OwnedBytes) -> io::Result<Arc<dyn ColumnValues<u64>>> {
    let header = U128Header::deserialize(&mut bytes)?;
    assert_eq!(header.codec_type, U128FastFieldCodecType::CompactSpace);
    let reader = CompactSpaceU64Accessor::open(bytes)?;
    Ok(Arc::new(reader))
 }
 #[cfg(test)]
 pub mod tests {
    use super::*;
--- a/columnar/src/column_values/u64_based/bitpacked.rs
+++ b/columnar/src/column_values/u64_based/bitpacked.rs
@@ -63,6 +63,7 @@ impl ColumnValues for BitpackedReader {
    fn get_val(&self, doc: u32) -> u64 {
        self.stats.min_value + self.stats.gcd.get() * self.bit_unpacker.get(doc, &self.data)
    }
    #[inline]
    fn min_value(&self) -> u64 {
        self.stats.min_value
--- a/columnar/src/column_values/u64_based/blockwise_linear.rs
+++ b/columnar/src/column_values/u64_based/blockwise_linear.rs
@@ -63,10 +63,7 @@ impl BlockwiseLinearEstimator {
        if self.block.is_empty() {
            return;
        }
-        let column = VecColumn::from(std::mem::take(&mut self.block));
+        let line = Line::train(&VecColumn::from(&self.block));
        let line = Line::train(&column);
        self.block = column.into();
        let mut max_value = 0u64;
        for (i, buffer_val) in self.block.iter().enumerate() {
            let interpolated_val = line.eval(i as u32);
@@ -128,7 +125,7 @@ impl ColumnCodecEstimator for BlockwiseLinearEstimator {
                *buffer_val = gcd_divider.divide(*buffer_val - stats.min_value);
            }
-            let line = Line::train(&VecColumn::from(buffer.to_vec()));
+            let line = Line::train(&VecColumn::from(&buffer));
            assert!(!buffer.is_empty());
--- a/columnar/src/column_values/u64_based/line.rs
+++ b/columnar/src/column_values/u64_based/line.rs
@@ -184,7 +184,7 @@ mod tests {
    }
    fn test_eval_max_err(ys: &[u64]) -> Option<u64> {
-        let line = Line::train(&VecColumn::from(ys.to_vec()));
+        let line = Line::train(&VecColumn::from(&ys));
        ys.iter()
            .enumerate()
            .map(|(x, y)| y.wrapping_sub(line.eval(x as u32)))
--- a/columnar/src/column_values/u64_based/linear.rs
+++ b/columnar/src/column_values/u64_based/linear.rs
@@ -173,9 +173,7 @@ impl LinearCodecEstimator {
    fn collect_before_line_estimation(&mut self, value: u64) {
        self.block.push(value);
        if self.block.len() == LINE_ESTIMATION_BLOCK_LEN {
-            let column = VecColumn::from(std::mem::take(&mut self.block));
+            let line = Line::train(&VecColumn::from(&self.block));
            let line = Line::train(&column);
            self.block = column.into();
            let block = std::mem::take(&mut self.block);
            for val in block {
                self.collect_after_line_estimation(&line, val);
--- a/columnar/src/column_values/u64_based/tests.rs
+++ b/columnar/src/column_values/u64_based/tests.rs
@@ -1,4 +1,5 @@
 use proptest::prelude::*;
 use proptest::strategy::Strategy;
 use proptest::{prop_oneof, proptest};
 #[test]
--- a/columnar/src/column_values/vec_column.rs
+++ b/columnar/src/column_values/vec_column.rs
@@ -4,14 +4,14 @@ use tantivy_bitpacker::minmax;
 use crate::ColumnValues;
-/// VecColumn provides `Column` over a `Vec<T>`.
+/// VecColumn provides `Column` over a slice.
-pub struct VecColumn<T = u64> {
+pub struct VecColumn<'a, T = u64> {
-    pub(crate) values: Vec<T>,
+    pub(crate) values: &'a [T],
    pub(crate) min_value: T,
    pub(crate) max_value: T,
 }
-impl<T: Copy + PartialOrd + Send + Sync + Debug + 'static> ColumnValues<T> for VecColumn<T> {
+impl<'a, T: Copy + PartialOrd + Send + Sync + Debug> ColumnValues<T> for VecColumn<'a, T> {
    fn get_val(&self, position: u32) -> T {
        self.values[position as usize]
    }
@@ -37,8 +37,11 @@ impl<T: Copy + PartialOrd + Send + Sync + Debug + 'static> ColumnValues<T> for V
    }
 }
-impl<T: Copy + PartialOrd + Default> From<Vec<T>> for VecColumn<T> {
+impl<'a, T: Copy + PartialOrd + Default, V> From<&'a V> for VecColumn<'a, T>
-    fn from(values: Vec<T>) -> Self {
+where V: AsRef<[T]> + ?Sized
 {
    fn from(values: &'a V) -> Self {
        let values = values.as_ref();
        let (min_value, max_value) = minmax(values.iter().copied()).unwrap_or_default();
        Self {
            values,
@@ -47,8 +50,3 @@ impl<T: Copy + PartialOrd + Default> From<Vec<T>> for VecColumn<T> {
        }
    }
 }
 impl From<VecColumn> for Vec<u64> {
    fn from(column: VecColumn) -> Self {
        column.values
    }
 }
--- a/columnar/src/columnar/merge/tests.rs
+++ b/columnar/src/columnar/merge/tests.rs
@@ -1,3 +1,7 @@
 use std::collections::BTreeMap;
 use itertools::Itertools;
 use super::*;
 use crate::{Cardinality, ColumnarWriter, HasAssociatedColumnType, RowId};
--- a/columnar/src/columnar/writer/mod.rs
+++ b/columnar/src/columnar/writer/mod.rs
@@ -13,7 +13,9 @@ pub(crate) use serializer::ColumnarSerializer;
 use stacker::{Addr, ArenaHashMap, MemoryArena};
 use crate::column_index::SerializableColumnIndex;
-use crate::column_values::{MonotonicallyMappableToU128, MonotonicallyMappableToU64};
+use crate::column_values::{
    ColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64, VecColumn,
 };
 use crate::columnar::column_type::ColumnType;
 use crate::columnar::writer::column_writers::{
    ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter,
@@ -643,7 +645,10 @@ fn send_to_serialize_column_mappable_to_u128<
    value_index_builders: &mut PreallocatedIndexBuilders,
    values: &mut Vec<T>,
    mut wrt: impl io::Write,
-) -> io::Result<()> {
+) -> io::Result<()>
 where
    for<'a> VecColumn<'a, T>: ColumnValues<T>,
 {
    values.clear();
    // TODO: split index and values
    let serializable_column_index = match cardinality {
@@ -696,7 +701,10 @@ fn send_to_serialize_column_mappable_to_u64(
    value_index_builders: &mut PreallocatedIndexBuilders,
    values: &mut Vec<u64>,
    mut wrt: impl io::Write,
-) -> io::Result<()> {
+) -> io::Result<()>
 where
    for<'a> VecColumn<'a, u64>: ColumnValues<u64>,
 {
    values.clear();
    let serializable_column_index = match cardinality {
        Cardinality::Full => {
--- a/columnar/src/columnar/writer/serializer.rs
+++ b/columnar/src/columnar/writer/serializer.rs
@@ -18,12 +18,7 @@ pub struct ColumnarSerializer<W: io::Write> {
 /// code.
 fn prepare_key(key: &[u8], column_type: ColumnType, buffer: &mut Vec<u8>) {
    buffer.clear();
-    // Convert 0 bytes to '0' string, as 0 bytes are reserved for the end of the path.
+    buffer.extend_from_slice(key);
    if key.contains(&0u8) {
        buffer.extend(key.iter().map(|&b| if b == 0 { b'0' } else { b }));
    } else {
        buffer.extend_from_slice(key);
    }
    buffer.push(0u8);
    buffer.push(column_type.to_code());
 }
@@ -101,13 +96,14 @@ impl<'a, W: io::Write> io::Write for ColumnSerializer<'a, W> {
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::columnar::column_type::ColumnType;
    #[test]
    fn test_prepare_key_bytes() {
        let mut buffer: Vec<u8> = b"somegarbage".to_vec();
        prepare_key(b"root\0child", ColumnType::Str, &mut buffer);
        assert_eq!(buffer.len(), 12);
-        assert_eq!(&buffer[..10], b"root0child");
+        assert_eq!(&buffer[..10], b"root\0child");
        assert_eq!(buffer[10], 0u8);
        assert_eq!(buffer[11], ColumnType::Str.to_code());
    }
--- a/columnar/src/dynamic_column.rs
+++ b/columnar/src/dynamic_column.rs
@@ -8,7 +8,7 @@ use common::{ByteCount, DateTime, HasLen, OwnedBytes};
 use crate::column::{BytesColumn, Column, StrColumn};
 use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn};
 use crate::columnar::ColumnType;
-use crate::{Cardinality, ColumnIndex, ColumnValues, NumericalType};
+use crate::{Cardinality, ColumnIndex, NumericalType};
 #[derive(Clone)]
 pub enum DynamicColumn {
@@ -247,12 +247,7 @@ impl DynamicColumnHandle {
    }
    /// Returns the `u64` fast field reader reader associated with `fields` of types
-    /// Str, u64, i64, f64, bool, ip, or datetime.
+    /// Str, u64, i64, f64, bool, or datetime.
    ///
    /// Notice that for IpAddr, the fastfield reader will return the u64 representation of the
    /// IpAddr.
    /// In order to convert to u128 back cast to `CompactSpaceU64Accessor` and call
    /// `compact_to_u128`.
    ///
    /// If not, the fastfield reader will returns the u64-value associated with the original
    /// FastValue.
@@ -263,10 +258,7 @@ impl DynamicColumnHandle {
                let column: BytesColumn = crate::column::open_column_bytes(column_bytes)?;
                Ok(Some(column.term_ord_column))
            }
-            ColumnType::IpAddr => {
+            ColumnType::IpAddr => Ok(None),
                let column = crate::column::open_column_u128_as_compact_u64(column_bytes)?;
                Ok(Some(column))
            }
            ColumnType::Bool
            | ColumnType::I64
            | ColumnType::U64
--- a/columnar/src/lib.rs
+++ b/columnar/src/lib.rs
@@ -113,9 +113,6 @@ impl Cardinality {
    pub fn is_multivalue(&self) -> bool {
        matches!(self, Cardinality::Multivalued)
    }
    pub fn is_full(&self) -> bool {
        matches!(self, Cardinality::Full)
    }
    pub(crate) fn to_code(self) -> u8 {
        self as u8
    }
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -1,3 +1,4 @@
 use std::convert::TryInto;
 use std::io::Write;
 use std::{fmt, io, u64};
--- a/common/src/datetime.rs
+++ b/common/src/datetime.rs
@@ -40,7 +40,7 @@ pub type DatePrecision = DateTimePrecision;
 /// All constructors and conversions are provided as explicit
 /// functions and not by implementing any `From`/`Into` traits
 /// to prevent unintended usage.
-#[derive(Clone, Default, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
+#[derive(Clone, Default, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct DateTime {
    // Timestamp in nanoseconds.
    pub(crate) timestamp_nanos: i64,
--- a/common/src/serialize.rs
+++ b/common/src/serialize.rs
@@ -290,7 +290,8 @@ impl<'a> BinarySerializable for Cow<'a, [u8]> {
 #[cfg(test)]
 pub mod test {
-    use super::*;
+    use super::{VInt, *};
    use crate::serialize::BinarySerializable;
    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
        let mut buffer = Vec::new();
        O::default().serialize(&mut buffer).unwrap();
--- a/ownedbytes/src/lib.rs
+++ b/ownedbytes/src/lib.rs
@@ -1,3 +1,4 @@
 use std::convert::TryInto;
 use std::ops::{Deref, Range};
 use std::sync::Arc;
 use std::{fmt, io};
--- a/src/aggregation/agg_req_with_accessor.rs
+++ b/src/aggregation/agg_req_with_accessor.rs
@@ -170,8 +170,8 @@ impl AggregationWithAccessor {
                    ColumnType::Str,
                    ColumnType::DateTime,
                    ColumnType::Bool,
                    ColumnType::IpAddr,
                    // ColumnType::Bytes Unsupported
                    // ColumnType::IpAddr Unsupported
                ];
                // In case the column is empty we want the shim column to match the missing type
@@ -292,7 +292,7 @@ impl AggregationWithAccessor {
                add_agg_with_accessor(&agg, accessor, column_type, &mut res)?;
            }
            TopHits(ref mut top_hits) => {
-                top_hits.validate_and_resolve_field_names(reader.fast_fields().columnar())?;
+                top_hits.validate_and_resolve(reader.fast_fields().columnar())?;
                let accessors: Vec<(Column<u64>, ColumnType)> = top_hits
                    .field_names()
                    .iter()
--- a/src/aggregation/agg_tests.rs
+++ b/src/aggregation/agg_tests.rs
@@ -4,7 +4,6 @@ use crate::aggregation::agg_req::{Aggregation, Aggregations};
 use crate::aggregation::agg_result::AggregationResults;
 use crate::aggregation::buf_collector::DOC_BLOCK_SIZE;
 use crate::aggregation::collector::AggregationCollector;
 use crate::aggregation::intermediate_agg_result::IntermediateAggregationResults;
 use crate::aggregation::segment_agg_result::AggregationLimits;
 use crate::aggregation::tests::{get_test_index_2_segments, get_test_index_from_values_and_terms};
 use crate::aggregation::DistributedAggregationCollector;
@@ -67,22 +66,6 @@ fn test_aggregation_flushing(
            }
        }
    },
    "top_hits_test":{
        "terms": {
            "field": "string_id"
        },
        "aggs": {
            "bucketsL2": {
                "top_hits": {
                    "size": 2,
                    "sort": [
                        { "score": "asc" }
                    ],
                    "docvalue_fields": ["score"]
                }
            }
        }
    },
    "histogram_test":{
        "histogram": {
            "field": "score",
@@ -125,16 +108,6 @@ fn test_aggregation_flushing(
        let searcher = reader.searcher();
        let intermediate_agg_result = searcher.search(&AllQuery, &collector).unwrap();
        // Test postcard roundtrip serialization
        let intermediate_agg_result_bytes = postcard::to_allocvec(&intermediate_agg_result).expect(
            "Postcard Serialization failed, flatten etc. is not supported in the intermediate \
             result",
        );
        let intermediate_agg_result: IntermediateAggregationResults =
            postcard::from_bytes(&intermediate_agg_result_bytes)
                .expect("Post deserialization failed");
        intermediate_agg_result
            .into_final_result(agg_req, &Default::default())
            .unwrap()
@@ -843,38 +816,38 @@ fn test_aggregation_on_json_object_mixed_types() {
    let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
    // => Segment with all values numeric
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": 10.0, "mixed_price": 10.0})))
+        .add_document(doc!(json => json!({"mixed_type": 10.0})))
        .unwrap();
    index_writer.commit().unwrap();
    // => Segment with all values text
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": "blue", "mixed_price": 5.0})))
+        .add_document(doc!(json => json!({"mixed_type": "blue"})))
        .unwrap();
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": "blue", "mixed_price": 5.0})))
+        .add_document(doc!(json => json!({"mixed_type": "blue"})))
        .unwrap();
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": "blue", "mixed_price": 5.0})))
+        .add_document(doc!(json => json!({"mixed_type": "blue"})))
        .unwrap();
    index_writer.commit().unwrap();
    // => Segment with all boolen
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": true, "mixed_price": "no_price"})))
+        .add_document(doc!(json => json!({"mixed_type": true})))
        .unwrap();
    index_writer.commit().unwrap();
    // => Segment with mixed values
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": "red", "mixed_price": 1.0})))
+        .add_document(doc!(json => json!({"mixed_type": "red"})))
        .unwrap();
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": "red", "mixed_price": 1.0})))
+        .add_document(doc!(json => json!({"mixed_type": "red"})))
        .unwrap();
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": -20.5, "mixed_price": -20.5})))
+        .add_document(doc!(json => json!({"mixed_type": -20.5})))
        .unwrap();
    index_writer
-        .add_document(doc!(json => json!({"mixed_type": true, "mixed_price": "no_price"})))
+        .add_document(doc!(json => json!({"mixed_type": true})))
        .unwrap();
    index_writer.commit().unwrap();
@@ -888,7 +861,7 @@ fn test_aggregation_on_json_object_mixed_types() {
                "order": { "min_price": "desc" }
            },
            "aggs": {
-                "min_price": { "min": { "field": "json.mixed_price" } }
+                "min_price": { "min": { "field": "json.mixed_type" } }
            }
        },
        "rangeagg": {
@@ -912,6 +885,7 @@ fn test_aggregation_on_json_object_mixed_types() {
    let aggregation_results = searcher.search(&AllQuery, &aggregation_collector).unwrap();
    let aggregation_res_json = serde_json::to_value(aggregation_results).unwrap();
    // pretty print as json
    use pretty_assertions::assert_eq;
    assert_eq!(
        &aggregation_res_json,
@@ -927,10 +901,10 @@ fn test_aggregation_on_json_object_mixed_types() {
          "termagg": {
            "buckets": [
              { "doc_count": 1, "key": 10.0, "min_price": { "value": 10.0 } },
              { "doc_count": 3, "key": "blue", "min_price": { "value": 5.0 } },
              { "doc_count": 2, "key": "red", "min_price": { "value": 1.0 } },
              { "doc_count": 1, "key": -20.5, "min_price": { "value": -20.5 } },
              { "doc_count": 2, "key": "red", "min_price": { "value": null } },
              { "doc_count": 2, "key": 1.0, "key_as_string": "true", "min_price": { "value": null } },
              { "doc_count": 3, "key": "blue", "min_price": { "value": null } },
            ],
            "sum_other_doc_count": 0
          }
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -1,5 +1,8 @@
 use std::cmp::Ordering;
 use std::fmt::Display;
 use columnar::ColumnType;
 use itertools::Itertools;
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
 use tantivy_bitpacker::minmax;
@@ -15,7 +18,7 @@ use crate::aggregation::intermediate_agg_result::{
    IntermediateHistogramBucketEntry,
 };
 use crate::aggregation::segment_agg_result::{
-    build_segment_agg_collector, SegmentAggregationCollector,
+    build_segment_agg_collector, AggregationLimits, SegmentAggregationCollector,
 };
 use crate::aggregation::*;
 use crate::TantivyError;
@@ -307,10 +310,7 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
            .column_block_accessor
            .fetch_block(docs, &bucket_agg_accessor.accessor);
-        for (doc, val) in bucket_agg_accessor
+        for (doc, val) in bucket_agg_accessor.column_block_accessor.iter_docid_vals() {
            .column_block_accessor
            .iter_docid_vals(docs, &bucket_agg_accessor.accessor)
        {
            let val = self.f64_from_fastfield_u64(val);
            let bucket_pos = get_bucket_pos(val);
@@ -597,11 +597,13 @@ mod tests {
    use serde_json::Value;
    use super::*;
    use crate::aggregation::agg_req::Aggregations;
    use crate::aggregation::agg_result::AggregationResults;
    use crate::aggregation::tests::{
        exec_request, exec_request_with_query, exec_request_with_query_and_memory_limit,
        get_test_index_2_segments, get_test_index_from_values, get_test_index_with_num_docs,
    };
    use crate::aggregation::AggregationCollector;
    use crate::query::AllQuery;
    #[test]
--- a/src/aggregation/bucket/range.rs
+++ b/src/aggregation/bucket/range.rs
@@ -1,6 +1,7 @@
 use std::fmt::Debug;
 use std::ops::Range;
 use columnar::{ColumnType, MonotonicallyMappableToU64};
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
@@ -235,10 +236,7 @@ impl SegmentAggregationCollector for SegmentRangeCollector {
            .column_block_accessor
            .fetch_block(docs, &bucket_agg_accessor.accessor);
-        for (doc, val) in bucket_agg_accessor
+        for (doc, val) in bucket_agg_accessor.column_block_accessor.iter_docid_vals() {
            .column_block_accessor
            .iter_docid_vals(docs, &bucket_agg_accessor.accessor)
        {
            let bucket_pos = self.get_bucket_pos(val);
            let bucket = &mut self.buckets[bucket_pos];
@@ -449,6 +447,7 @@ pub(crate) fn range_to_key(range: &Range<u64>, field_type: &ColumnType) -> crate
 #[cfg(test)]
 mod tests {
    use columnar::MonotonicallyMappableToU64;
    use serde_json::Value;
    use super::*;
@@ -457,6 +456,7 @@ mod tests {
        exec_request, exec_request_with_query, get_test_index_2_segments,
        get_test_index_with_num_docs,
    };
    use crate::aggregation::AggregationLimits;
    pub fn get_collector_from_ranges(
        ranges: Vec<RangeAggregationRange>,
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -1,10 +1,6 @@
 use std::fmt::Debug;
 use std::net::Ipv6Addr;
-use columnar::column_values::CompactSpaceU64Accessor;
+use columnar::{BytesColumn, ColumnType, MonotonicallyMappableToU64, StrColumn};
 use columnar::{
    BytesColumn, ColumnType, MonotonicallyMappableToU128, MonotonicallyMappableToU64, StrColumn,
 };
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
@@ -109,9 +105,9 @@ pub struct TermsAggregation {
    ///
    /// Defaults to 10 * size.
    #[serde(skip_serializing_if = "Option::is_none", default)]
-    #[serde(alias = "shard_size")]
+    #[serde(alias = "segment_size")]
    #[serde(alias = "split_size")]
-    pub segment_size: Option<u32>,
+    pub shard_size: Option<u32>,
    /// If you set the `show_term_doc_count_error` parameter to true, the terms aggregation will
    /// include doc_count_error_upper_bound, which is an upper bound to the error on the
@@ -200,7 +196,7 @@ impl TermsAggregationInternal {
    pub(crate) fn from_req(req: &TermsAggregation) -> Self {
        let size = req.size.unwrap_or(10);
-        let mut segment_size = req.segment_size.unwrap_or(size * 10);
+        let mut segment_size = req.shard_size.unwrap_or(size * 10);
        let order = req.order.clone().unwrap_or_default();
        segment_size = segment_size.max(size);
@@ -310,10 +306,7 @@ impl SegmentAggregationCollector for SegmentTermCollector {
        }
        // has subagg
        if let Some(blueprint) = self.blueprint.as_ref() {
-            for (doc, term_id) in bucket_agg_accessor
+            for (doc, term_id) in bucket_agg_accessor.column_block_accessor.iter_docid_vals() {
                .column_block_accessor
                .iter_docid_vals(docs, &bucket_agg_accessor.accessor)
            {
                let sub_aggregations = self
                    .term_buckets
                    .sub_aggs
@@ -542,27 +535,6 @@ impl SegmentTermCollector {
                let val = bool::from_u64(val);
                dict.insert(IntermediateKey::Bool(val), intermediate_entry);
            }
        } else if self.column_type == ColumnType::IpAddr {
            let compact_space_accessor = agg_with_accessor
                .accessor
                .values
                .clone()
                .downcast_arc::<CompactSpaceU64Accessor>()
                .map_err(|_| {
                    TantivyError::AggregationError(
                        crate::aggregation::AggregationError::InternalError(
                            "Type mismatch: Could not downcast to CompactSpaceU64Accessor"
                                .to_string(),
                        ),
                    )
                })?;
            for (val, doc_count) in entries {
                let intermediate_entry = into_intermediate_bucket_entry(val, doc_count)?;
                let val: u128 = compact_space_accessor.compact_to_u128(val as u32);
                let val = Ipv6Addr::from_u128(val);
                dict.insert(IntermediateKey::IpAddr(val), intermediate_entry);
            }
        } else {
            for (val, doc_count) in entries {
                let intermediate_entry = into_intermediate_bucket_entry(val, doc_count)?;
@@ -615,9 +587,6 @@ pub(crate) fn cut_off_buckets<T: GetDocCount + Debug>(
 #[cfg(test)]
 mod tests {
    use std::net::IpAddr;
    use std::str::FromStr;
    use common::DateTime;
    use time::{Date, Month};
@@ -628,7 +597,7 @@ mod tests {
    };
    use crate::aggregation::AggregationLimits;
    use crate::indexer::NoMergePolicy;
-    use crate::schema::{IntoIpv6Addr, Schema, FAST, STRING};
+    use crate::schema::{Schema, FAST, STRING};
    use crate::{Index, IndexWriter};
    #[test]
@@ -1210,9 +1179,9 @@ mod tests {
        assert_eq!(res["my_texts"]["buckets"][0]["key"], "terma");
        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 4);
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "termb");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "termc");
        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 0);
-        assert_eq!(res["my_texts"]["buckets"][2]["key"], "termc");
+        assert_eq!(res["my_texts"]["buckets"][2]["key"], "termb");
        assert_eq!(res["my_texts"]["buckets"][2]["doc_count"], 0);
        assert_eq!(res["my_texts"]["sum_other_doc_count"], 0);
        assert_eq!(res["my_texts"]["doc_count_error_upper_bound"], 0);
@@ -1958,44 +1927,4 @@ mod tests {
        Ok(())
    }
    #[test]
    fn terms_aggregation_ip_addr() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let field = schema_builder.add_ip_addr_field("ip_field", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        {
            let mut writer = index.writer_with_num_threads(1, 15_000_000)?;
            // IpV6 loopback
            writer.add_document(doc!(field=>IpAddr::from_str("::1").unwrap().into_ipv6_addr()))?;
            writer.add_document(doc!(field=>IpAddr::from_str("::1").unwrap().into_ipv6_addr()))?;
            // IpV4
            writer.add_document(
                doc!(field=>IpAddr::from_str("127.0.0.1").unwrap().into_ipv6_addr()),
            )?;
            writer.commit()?;
        }
        let agg_req: Aggregations = serde_json::from_value(json!({
            "my_bool": {
                "terms": {
                    "field": "ip_field"
                },
            }
        }))
        .unwrap();
        let res = exec_request_with_query(agg_req, &index, None)?;
        // print as json
        // println!("{}", serde_json::to_string_pretty(&res).unwrap());
        assert_eq!(res["my_bool"]["buckets"][0]["key"], "::1");
        assert_eq!(res["my_bool"]["buckets"][0]["doc_count"], 2);
        assert_eq!(res["my_bool"]["buckets"][1]["key"], "127.0.0.1");
        assert_eq!(res["my_bool"]["buckets"][1]["doc_count"], 1);
        assert_eq!(res["my_bool"]["buckets"][2]["key"], serde_json::Value::Null);
        Ok(())
    }
 }
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -5,7 +5,6 @@
 use std::cmp::Ordering;
 use std::collections::hash_map::Entry;
 use std::hash::Hash;
 use std::net::Ipv6Addr;
 use columnar::ColumnType;
 use itertools::Itertools;
@@ -20,7 +19,7 @@ use super::bucket::{
 };
 use super::metric::{
    IntermediateAverage, IntermediateCount, IntermediateMax, IntermediateMin, IntermediateStats,
-    IntermediateSum, PercentilesCollector, TopHitsTopNComputer,
+    IntermediateSum, PercentilesCollector, TopHitsCollector,
 };
 use super::segment_agg_result::AggregationLimits;
 use super::{format_date, AggregationError, Key, SerializedKey};
@@ -42,8 +41,6 @@ pub struct IntermediateAggregationResults {
 /// This might seem redundant with `Key`, but the point is to have a different
 /// Serialize implementation.
 pub enum IntermediateKey {
    /// Ip Addr key
    IpAddr(Ipv6Addr),
    /// Bool key
    Bool(bool),
    /// String key
@@ -63,14 +60,6 @@ impl From<IntermediateKey> for Key {
    fn from(value: IntermediateKey) -> Self {
        match value {
            IntermediateKey::Str(s) => Self::Str(s),
            IntermediateKey::IpAddr(s) => {
                // Prefer to use the IPv4 representation if possible
                if let Some(ip) = s.to_ipv4_mapped() {
                    Self::Str(ip.to_string())
                } else {
                    Self::Str(s.to_string())
                }
            }
            IntermediateKey::F64(f) => Self::F64(f),
            IntermediateKey::Bool(f) => Self::F64(f as u64 as f64),
        }
@@ -86,7 +75,6 @@ impl std::hash::Hash for IntermediateKey {
            IntermediateKey::Str(text) => text.hash(state),
            IntermediateKey::F64(val) => val.to_bits().hash(state),
            IntermediateKey::Bool(val) => val.hash(state),
            IntermediateKey::IpAddr(val) => val.hash(state),
        }
    }
 }
@@ -221,9 +209,9 @@ pub(crate) fn empty_from_req(req: &Aggregation) -> IntermediateAggregationResult
        Percentiles(_) => IntermediateAggregationResult::Metric(
            IntermediateMetricResult::Percentiles(PercentilesCollector::default()),
        ),
-        TopHits(ref req) => IntermediateAggregationResult::Metric(
+        TopHits(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::TopHits(
-            IntermediateMetricResult::TopHits(TopHitsTopNComputer::new(req.clone())),
+            TopHitsCollector::default(),
-        ),
+        )),
    }
 }
@@ -285,7 +273,7 @@ pub enum IntermediateMetricResult {
    /// Intermediate sum result.
    Sum(IntermediateSum),
    /// Intermediate top_hits result
-    TopHits(TopHitsTopNComputer),
+    TopHits(TopHitsCollector),
 }
 impl IntermediateMetricResult {
@@ -314,7 +302,7 @@ impl IntermediateMetricResult {
                    .into_final_result(req.agg.as_percentile().expect("unexpected metric type")),
            ),
            IntermediateMetricResult::TopHits(top_hits) => {
-                MetricResult::TopHits(top_hits.into_final_result())
+                MetricResult::TopHits(top_hits.finalize())
            }
        }
    }
--- a/src/aggregation/metric/mod.rs
+++ b/src/aggregation/metric/mod.rs
@@ -25,8 +25,6 @@ mod stats;
 mod sum;
 mod top_hits;
 use std::collections::HashMap;
 pub use average::*;
 pub use count::*;
 pub use max::*;
@@ -38,8 +36,6 @@ pub use stats::*;
 pub use sum::*;
 pub use top_hits::*;
 use crate::schema::OwnedValue;
 /// Single-metric aggregations use this common result structure.
 ///
 /// Main reason to wrap it in value is to match elasticsearch output structure.
@@ -96,9 +92,8 @@ pub struct TopHitsVecEntry {
    /// Search results, for queries that include field retrieval requests
    /// (`docvalue_fields`).
-    #[serde(rename = "docvalue_fields")]
+    #[serde(flatten)]
-    #[serde(skip_serializing_if = "HashMap::is_empty")]
+    pub search_results: FieldRetrivalResult,
    pub doc_value_fields: HashMap<String, OwnedValue>,
 }
 /// The top_hits metric aggregation results a list of top hits by sort criteria.
--- a/src/aggregation/metric/percentiles.rs
+++ b/src/aggregation/metric/percentiles.rs
@@ -1,5 +1,6 @@
 use std::fmt::Debug;
 use columnar::ColumnType;
 use serde::{Deserialize, Serialize};
 use super::*;
--- a/src/aggregation/metric/stats.rs
+++ b/src/aggregation/metric/stats.rs
@@ -1,3 +1,4 @@
 use columnar::ColumnType;
 use serde::{Deserialize, Serialize};
 use super::*;
--- a/src/aggregation/metric/top_hits.rs
+++ b/src/aggregation/metric/top_hits.rs
@@ -1,8 +1,7 @@
 use std::collections::HashMap;
-use std::net::Ipv6Addr;
+use std::fmt::Formatter;
 use columnar::{ColumnarReader, DynamicColumn};
 use common::DateTime;
 use regex::Regex;
 use serde::ser::SerializeMap;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
@@ -93,61 +92,53 @@ pub struct TopHitsAggregation {
    size: usize,
    from: Option<usize>,
    #[serde(flatten)]
    retrieval: RetrievalFields,
 }
 const fn default_doc_value_fields() -> Vec<String> {
    Vec::new()
 }
 /// Search query spec for each matched document
 /// TODO: move this to a common module
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
 pub struct RetrievalFields {
    /// The fast fields to return for each hit.
    /// This is the only variant supported for now.
    /// TODO: support the {field, format} variant for custom formatting.
    #[serde(rename = "docvalue_fields")]
-    #[serde(default)]
+    #[serde(default = "default_doc_value_fields")]
-    doc_value_fields: Vec<String>,
+    pub doc_value_fields: Vec<String>,
 }
-#[derive(Debug, Clone, PartialEq, Default)]
+/// Search query result for each matched document
-struct KeyOrder {
+/// TODO: move this to a common module
-    field: String,
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
-    order: Order,
+pub struct FieldRetrivalResult {
    /// The fast fields returned for each hit.
    #[serde(rename = "docvalue_fields")]
    #[serde(skip_serializing_if = "HashMap::is_empty")]
    pub doc_value_fields: HashMap<String, OwnedValue>,
 }
-impl Serialize for KeyOrder {
+impl RetrievalFields {
-    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
+    fn get_field_names(&self) -> Vec<&str> {
-        let KeyOrder { field, order } = self;
+        self.doc_value_fields.iter().map(|s| s.as_str()).collect()
        let mut map = serializer.serialize_map(Some(1))?;
        map.serialize_entry(field, order)?;
        map.end()
    }
 }
-impl<'de> Deserialize<'de> for KeyOrder {
+    fn resolve_field_names(&mut self, reader: &ColumnarReader) -> crate::Result<()> {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+        // Tranform a glob (`pattern*`, for example) into a regex::Regex (`^pattern.*$`)
-    where D: Deserializer<'de> {
+        let globbed_string_to_regex = |glob: &str| {
-        let mut key_order = <HashMap<String, Order>>::deserialize(deserializer)?.into_iter();
+            // Replace `*` glob with `.*` regex
-        let (field, order) = key_order.next().ok_or(serde::de::Error::custom(
+            let sanitized = format!("^{}$", regex::escape(glob).replace(r"\*", ".*"));
-            "Expected exactly one key-value pair in sort parameter of top_hits, found none",
+            Regex::new(&sanitized.replace('*', ".*")).map_err(|e| {
-        ))?;
+                crate::TantivyError::SchemaError(format!(
-        if key_order.next().is_some() {
+                    "Invalid regex '{}' in docvalue_fields: {}",
-            return Err(serde::de::Error::custom(format!(
+                    glob, e
-                "Expected exactly one key-value pair in sort parameter of top_hits, found {:?}",
+                ))
-                key_order
+            })
-            )));
+        };
        }
        Ok(Self { field, order })
    }
 }
 // Tranform a glob (`pattern*`, for example) into a regex::Regex (`^pattern.*$`)
 fn globbed_string_to_regex(glob: &str) -> Result<Regex, crate::TantivyError> {
    // Replace `*` glob with `.*` regex
    let sanitized = format!("^{}$", regex::escape(glob).replace(r"\*", ".*"));
    Regex::new(&sanitized.replace('*', ".*")).map_err(|e| {
        crate::TantivyError::SchemaError(format!(
            "Invalid regex '{}' in docvalue_fields: {}",
            glob, e
        ))
    })
 }
 impl TopHitsAggregation {
    /// Validate and resolve field retrieval parameters
    pub fn validate_and_resolve_field_names(
        &mut self,
        reader: &ColumnarReader,
    ) -> crate::Result<()> {
        self.doc_value_fields = self
            .doc_value_fields
            .iter()
@@ -184,25 +175,12 @@ impl TopHitsAggregation {
        Ok(())
    }
    /// Return fields accessed by the aggregator, in order.
    pub fn field_names(&self) -> Vec<&str> {
        self.sort
            .iter()
            .map(|KeyOrder { field, .. }| field.as_str())
            .collect()
    }
    /// Return fields accessed by the aggregator's value retrieval.
    pub fn value_field_names(&self) -> Vec<&str> {
        self.doc_value_fields.iter().map(|s| s.as_str()).collect()
    }
    fn get_document_field_data(
        &self,
        accessors: &HashMap<String, Vec<DynamicColumn>>,
        doc_id: DocId,
-    ) -> HashMap<String, FastFieldValue> {
+    ) -> FieldRetrivalResult {
-        let doc_value_fields = self
+        let dvf = self
            .doc_value_fields
            .iter()
            .map(|field| {
@@ -210,20 +188,20 @@ impl TopHitsAggregation {
                    .get(field)
                    .unwrap_or_else(|| panic!("field '{}' not found in accessors", field));
-                let values: Vec<FastFieldValue> = accessors
+                let values: Vec<OwnedValue> = accessors
                    .iter()
                    .flat_map(|accessor| match accessor {
                        DynamicColumn::U64(accessor) => accessor
                            .values_for_doc(doc_id)
-                            .map(FastFieldValue::U64)
+                            .map(OwnedValue::U64)
                            .collect::<Vec<_>>(),
                        DynamicColumn::I64(accessor) => accessor
                            .values_for_doc(doc_id)
-                            .map(FastFieldValue::I64)
+                            .map(OwnedValue::I64)
                            .collect::<Vec<_>>(),
                        DynamicColumn::F64(accessor) => accessor
                            .values_for_doc(doc_id)
-                            .map(FastFieldValue::F64)
+                            .map(OwnedValue::F64)
                            .collect::<Vec<_>>(),
                        DynamicColumn::Bytes(accessor) => accessor
                            .term_ords(doc_id)
@@ -235,7 +213,7 @@ impl TopHitsAggregation {
                                        .expect("could not read term dictionary"),
                                    "term corresponding to term_ord does not exist"
                                );
-                                FastFieldValue::Bytes(buffer)
+                                OwnedValue::Bytes(buffer)
                            })
                            .collect::<Vec<_>>(),
                        DynamicColumn::Str(accessor) => accessor
@@ -248,82 +226,94 @@ impl TopHitsAggregation {
                                        .expect("could not read term dictionary"),
                                    "term corresponding to term_ord does not exist"
                                );
-                                FastFieldValue::Str(String::from_utf8(buffer).unwrap())
+                                OwnedValue::Str(String::from_utf8(buffer).unwrap())
                            })
                            .collect::<Vec<_>>(),
                        DynamicColumn::Bool(accessor) => accessor
                            .values_for_doc(doc_id)
-                            .map(FastFieldValue::Bool)
+                            .map(OwnedValue::Bool)
                            .collect::<Vec<_>>(),
                        DynamicColumn::IpAddr(accessor) => accessor
                            .values_for_doc(doc_id)
-                            .map(FastFieldValue::IpAddr)
+                            .map(OwnedValue::IpAddr)
                            .collect::<Vec<_>>(),
                        DynamicColumn::DateTime(accessor) => accessor
                            .values_for_doc(doc_id)
-                            .map(FastFieldValue::Date)
+                            .map(OwnedValue::Date)
                            .collect::<Vec<_>>(),
                    })
                    .collect();
-                (field.to_owned(), FastFieldValue::Array(values))
+                (field.to_owned(), OwnedValue::Array(values))
            })
            .collect();
-        doc_value_fields
+        FieldRetrivalResult {
-    }
+            doc_value_fields: dvf,
 }
 /// A retrieved value from a fast field.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub enum FastFieldValue {
    /// The str type is used for any text information.
    Str(String),
    /// Unsigned 64-bits Integer `u64`
    U64(u64),
    /// Signed 64-bits Integer `i64`
    I64(i64),
    /// 64-bits Float `f64`
    F64(f64),
    /// Bool value
    Bool(bool),
    /// Date/time with nanoseconds precision
    Date(DateTime),
    /// Arbitrarily sized byte array
    Bytes(Vec<u8>),
    /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
    IpAddr(Ipv6Addr),
    /// A list of values.
    Array(Vec<Self>),
 }
 impl From<FastFieldValue> for OwnedValue {
    fn from(value: FastFieldValue) -> Self {
        match value {
            FastFieldValue::Str(s) => OwnedValue::Str(s),
            FastFieldValue::U64(u) => OwnedValue::U64(u),
            FastFieldValue::I64(i) => OwnedValue::I64(i),
            FastFieldValue::F64(f) => OwnedValue::F64(f),
            FastFieldValue::Bool(b) => OwnedValue::Bool(b),
            FastFieldValue::Date(d) => OwnedValue::Date(d),
            FastFieldValue::Bytes(b) => OwnedValue::Bytes(b),
            FastFieldValue::IpAddr(ip) => OwnedValue::IpAddr(ip),
            FastFieldValue::Array(a) => {
                OwnedValue::Array(a.into_iter().map(OwnedValue::from).collect())
            }
        }
    }
 }
-/// Holds a fast field value in its u64 representation, and the order in which it should be sorted.
+#[derive(Debug, Clone, PartialEq, Default)]
-#[derive(Clone, Serialize, Deserialize, Debug)]
+struct KeyOrder {
-struct DocValueAndOrder {
+    field: String,
    /// A fast field value in its u64 representation.
    value: Option<u64>,
    /// Sort order for the value
    order: Order,
 }
-impl Ord for DocValueAndOrder {
+impl Serialize for KeyOrder {
    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
        let KeyOrder { field, order } = self;
        let mut map = serializer.serialize_map(Some(1))?;
        map.serialize_entry(field, order)?;
        map.end()
    }
 }
 impl<'de> Deserialize<'de> for KeyOrder {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where D: Deserializer<'de> {
        let mut k_o = <HashMap<String, Order>>::deserialize(deserializer)?.into_iter();
        let (k, v) = k_o.next().ok_or(serde::de::Error::custom(
            "Expected exactly one key-value pair in KeyOrder, found none",
        ))?;
        if k_o.next().is_some() {
            return Err(serde::de::Error::custom(
                "Expected exactly one key-value pair in KeyOrder, found more",
            ));
        }
        Ok(Self { field: k, order: v })
    }
 }
 impl TopHitsAggregation {
    /// Validate and resolve field retrieval parameters
    pub fn validate_and_resolve(&mut self, reader: &ColumnarReader) -> crate::Result<()> {
        self.retrieval.resolve_field_names(reader)
    }
    /// Return fields accessed by the aggregator, in order.
    pub fn field_names(&self) -> Vec<&str> {
        self.sort
            .iter()
            .map(|KeyOrder { field, .. }| field.as_str())
            .collect()
    }
    /// Return fields accessed by the aggregator's value retrieval.
    pub fn value_field_names(&self) -> Vec<&str> {
        self.retrieval.get_field_names()
    }
 }
 /// Holds a single comparable doc feature, and the order in which it should be sorted.
 #[derive(Clone, Serialize, Deserialize, Debug)]
 struct ComparableDocFeature {
    /// Stores any u64-mappable feature.
    value: Option<u64>,
    /// Sort order for the doc feature
    order: Order,
 }
 impl Ord for ComparableDocFeature {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        let invert = |cmp: std::cmp::Ordering| match self.order {
            Order::Asc => cmp,
@@ -339,32 +329,26 @@ impl Ord for DocValueAndOrder {
    }
 }
-impl PartialOrd for DocValueAndOrder {
+impl PartialOrd for ComparableDocFeature {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
 }
-impl PartialEq for DocValueAndOrder {
+impl PartialEq for ComparableDocFeature {
    fn eq(&self, other: &Self) -> bool {
        self.value.cmp(&other.value) == std::cmp::Ordering::Equal
    }
 }
-impl Eq for DocValueAndOrder {}
+impl Eq for ComparableDocFeature {}
 #[derive(Clone, Serialize, Deserialize, Debug)]
-struct DocSortValuesAndFields {
+struct ComparableDocFeatures(Vec<ComparableDocFeature>, FieldRetrivalResult);
    sorts: Vec<DocValueAndOrder>,
-    #[serde(rename = "docvalue_fields")]
+impl Ord for ComparableDocFeatures {
    #[serde(skip_serializing_if = "HashMap::is_empty")]
    doc_value_fields: HashMap<String, FastFieldValue>,
 }
 impl Ord for DocSortValuesAndFields {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        for (self_feature, other_feature) in self.sorts.iter().zip(other.sorts.iter()) {
+        for (self_feature, other_feature) in self.0.iter().zip(other.0.iter()) {
            let cmp = self_feature.cmp(other_feature);
            if cmp != std::cmp::Ordering::Equal {
                return cmp;
@@ -374,43 +358,53 @@ impl Ord for DocSortValuesAndFields {
    }
 }
-impl PartialOrd for DocSortValuesAndFields {
+impl PartialOrd for ComparableDocFeatures {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
 }
-impl PartialEq for DocSortValuesAndFields {
+impl PartialEq for ComparableDocFeatures {
    fn eq(&self, other: &Self) -> bool {
        self.cmp(other) == std::cmp::Ordering::Equal
    }
 }
-impl Eq for DocSortValuesAndFields {}
+impl Eq for ComparableDocFeatures {}
 /// The TopHitsCollector used for collecting over segments and merging results.
-#[derive(Clone, Serialize, Deserialize, Debug)]
+#[derive(Clone, Serialize, Deserialize)]
-pub struct TopHitsTopNComputer {
+pub struct TopHitsCollector {
    req: TopHitsAggregation,
-    top_n: TopNComputer<DocSortValuesAndFields, DocAddress, false>,
+    top_n: TopNComputer<ComparableDocFeatures, DocAddress, false>,
 }
-impl std::cmp::PartialEq for TopHitsTopNComputer {
+impl Default for TopHitsCollector {
    fn default() -> Self {
        Self {
            req: TopHitsAggregation::default(),
            top_n: TopNComputer::new(1),
        }
    }
 }
 impl std::fmt::Debug for TopHitsCollector {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("TopHitsCollector")
            .field("req", &self.req)
            .field("top_n_threshold", &self.top_n.threshold)
            .finish()
    }
 }
 impl std::cmp::PartialEq for TopHitsCollector {
    fn eq(&self, _other: &Self) -> bool {
        false
    }
 }
-impl TopHitsTopNComputer {
+impl TopHitsCollector {
-    /// Create a new TopHitsCollector
+    fn collect(&mut self, features: ComparableDocFeatures, doc: DocAddress) {
    pub fn new(req: TopHitsAggregation) -> Self {
        Self {
            top_n: TopNComputer::new(req.size + req.from.unwrap_or(0)),
            req,
        }
    }
    fn collect(&mut self, features: DocSortValuesAndFields, doc: DocAddress) {
        self.top_n.push(features, doc);
    }
@@ -422,19 +416,14 @@ impl TopHitsTopNComputer {
    }
    /// Finalize by converting self into the final result form
-    pub fn into_final_result(self) -> TopHitsMetricResult {
+    pub fn finalize(self) -> TopHitsMetricResult {
        let mut hits: Vec<TopHitsVecEntry> = self
            .top_n
            .into_sorted_vec()
            .into_iter()
            .map(|doc| TopHitsVecEntry {
-                sort: doc.feature.sorts.iter().map(|f| f.value).collect(),
+                sort: doc.feature.0.iter().map(|f| f.value).collect(),
-                doc_value_fields: doc
+                search_results: doc.feature.1,
                    .feature
                    .doc_value_fields
                    .into_iter()
                    .map(|(k, v)| (k, v.into()))
                    .collect(),
            })
            .collect();
@@ -447,63 +436,48 @@ impl TopHitsTopNComputer {
    }
 }
-#[derive(Clone, Debug)]
+#[derive(Clone)]
-pub(crate) struct TopHitsSegmentCollector {
+pub(crate) struct SegmentTopHitsCollector {
    segment_ordinal: SegmentOrdinal,
    accessor_idx: usize,
-    req: TopHitsAggregation,
+    inner_collector: TopHitsCollector,
    top_n: TopNComputer<Vec<DocValueAndOrder>, DocAddress, false>,
 }
-impl TopHitsSegmentCollector {
+impl SegmentTopHitsCollector {
    pub fn from_req(
        req: &TopHitsAggregation,
        accessor_idx: usize,
        segment_ordinal: SegmentOrdinal,
    ) -> Self {
        Self {
-            req: req.clone(),
+            inner_collector: TopHitsCollector {
-            top_n: TopNComputer::new(req.size + req.from.unwrap_or(0)),
+                req: req.clone(),
                top_n: TopNComputer::new(req.size + req.from.unwrap_or(0)),
            },
            segment_ordinal,
            accessor_idx,
        }
    }
-    fn into_top_hits_collector(
+}
        self,
        value_accessors: &HashMap<String, Vec<DynamicColumn>>,
    ) -> TopHitsTopNComputer {
        let mut top_hits_computer = TopHitsTopNComputer::new(self.req.clone());
        let top_results = self.top_n.into_vec();
-        for res in top_results {
+impl std::fmt::Debug for SegmentTopHitsCollector {
-            let doc_value_fields = self
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-                .req
+        f.debug_struct("SegmentTopHitsCollector")
-                .get_document_field_data(value_accessors, res.doc.doc_id);
+            .field("segment_id", &self.segment_ordinal)
-            top_hits_computer.collect(
+            .field("accessor_idx", &self.accessor_idx)
-                DocSortValuesAndFields {
+            .field("inner_collector", &self.inner_collector)
-                    sorts: res.feature,
+            .finish()
                    doc_value_fields,
                },
                res.doc,
            );
        }
        top_hits_computer
    }
 }
-impl SegmentAggregationCollector for TopHitsSegmentCollector {
+impl SegmentAggregationCollector for SegmentTopHitsCollector {
    fn add_intermediate_aggregation_result(
        self: Box<Self>,
        agg_with_accessor: &crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor,
        results: &mut crate::aggregation::intermediate_agg_result::IntermediateAggregationResults,
    ) -> crate::Result<()> {
        let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
-
+        let intermediate_result = IntermediateMetricResult::TopHits(self.inner_collector);
        let value_accessors = &agg_with_accessor.aggs.values[self.accessor_idx].value_accessors;
        let intermediate_result =
            IntermediateMetricResult::TopHits(self.into_top_hits_collector(value_accessors));
        results.push(
            name,
            IntermediateAggregationResult::Metric(intermediate_result),
@@ -516,7 +490,9 @@ impl SegmentAggregationCollector for TopHitsSegmentCollector {
        agg_with_accessor: &mut crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor,
    ) -> crate::Result<()> {
        let accessors = &agg_with_accessor.aggs.values[self.accessor_idx].accessors;
-        let sorts: Vec<DocValueAndOrder> = self
+        let value_accessors = &agg_with_accessor.aggs.values[self.accessor_idx].value_accessors;
        let features: Vec<ComparableDocFeature> = self
            .inner_collector
            .req
            .sort
            .iter()
@@ -529,12 +505,18 @@ impl SegmentAggregationCollector for TopHitsSegmentCollector {
                    .0
                    .values_for_doc(doc_id)
                    .next();
-                DocValueAndOrder { value, order }
+                ComparableDocFeature { value, order }
            })
            .collect();
-        self.top_n.push(
+        let retrieval_result = self
-            sorts,
+            .inner_collector
            .req
            .retrieval
            .get_document_field_data(value_accessors, doc_id);
        self.inner_collector.collect(
            ComparableDocFeatures(features, retrieval_result),
            DocAddress {
                segment_ord: self.segment_ordinal,
                doc_id,
@@ -548,7 +530,11 @@ impl SegmentAggregationCollector for TopHitsSegmentCollector {
        docs: &[crate::DocId],
        agg_with_accessor: &mut crate::aggregation::agg_req_with_accessor::AggregationsWithAccessor,
    ) -> crate::Result<()> {
-        // TODO: Consider getting fields with the column block accessor.
+        // TODO: Consider getting fields with the column block accessor and refactor this.
        // ---
        // Would the additional complexity of getting fields with the column_block_accessor
        // make sense here? Probably yes, but I want to get a first-pass review first
        // before proceeding.
        for doc in docs {
            self.collect(*doc, agg_with_accessor)?;
        }
@@ -563,7 +549,7 @@ mod tests {
    use serde_json::Value;
    use time::macros::datetime;
-    use super::{DocSortValuesAndFields, DocValueAndOrder, Order};
+    use super::{ComparableDocFeature, ComparableDocFeatures, Order};
    use crate::aggregation::agg_req::Aggregations;
    use crate::aggregation::agg_result::AggregationResults;
    use crate::aggregation::bucket::tests::get_test_index_from_docs;
@@ -571,44 +557,44 @@ mod tests {
    use crate::aggregation::AggregationCollector;
    use crate::collector::ComparableDoc;
    use crate::query::AllQuery;
-    use crate::schema::OwnedValue;
+    use crate::schema::OwnedValue as SchemaValue;
-    fn invert_order(cmp_feature: DocValueAndOrder) -> DocValueAndOrder {
+    fn invert_order(cmp_feature: ComparableDocFeature) -> ComparableDocFeature {
-        let DocValueAndOrder { value, order } = cmp_feature;
+        let ComparableDocFeature { value, order } = cmp_feature;
        let order = match order {
            Order::Asc => Order::Desc,
            Order::Desc => Order::Asc,
        };
-        DocValueAndOrder { value, order }
+        ComparableDocFeature { value, order }
    }
-    fn collector_with_capacity(capacity: usize) -> super::TopHitsTopNComputer {
+    fn collector_with_capacity(capacity: usize) -> super::TopHitsCollector {
-        super::TopHitsTopNComputer {
+        super::TopHitsCollector {
            top_n: super::TopNComputer::new(capacity),
-            req: Default::default(),
+            ..Default::default()
        }
    }
-    fn invert_order_features(mut cmp_features: DocSortValuesAndFields) -> DocSortValuesAndFields {
+    fn invert_order_features(cmp_features: ComparableDocFeatures) -> ComparableDocFeatures {
-        cmp_features.sorts = cmp_features
+        let ComparableDocFeatures(cmp_features, search_results) = cmp_features;
-            .sorts
+        let cmp_features = cmp_features
            .into_iter()
            .map(invert_order)
            .collect::<Vec<_>>();
-        cmp_features
+        ComparableDocFeatures(cmp_features, search_results)
    }
    #[test]
    fn test_comparable_doc_feature() -> crate::Result<()> {
-        let small = DocValueAndOrder {
+        let small = ComparableDocFeature {
            value: Some(1),
            order: Order::Asc,
        };
-        let big = DocValueAndOrder {
+        let big = ComparableDocFeature {
            value: Some(2),
            order: Order::Asc,
        };
-        let none = DocValueAndOrder {
+        let none = ComparableDocFeature {
            value: None,
            order: Order::Asc,
        };
@@ -630,21 +616,21 @@ mod tests {
    #[test]
    fn test_comparable_doc_features() -> crate::Result<()> {
-        let features_1 = DocSortValuesAndFields {
+        let features_1 = ComparableDocFeatures(
-            sorts: vec![DocValueAndOrder {
+            vec![ComparableDocFeature {
                value: Some(1),
                order: Order::Asc,
            }],
-            doc_value_fields: Default::default(),
+            Default::default(),
-        };
+        );
-        let features_2 = DocSortValuesAndFields {
+        let features_2 = ComparableDocFeatures(
-            sorts: vec![DocValueAndOrder {
+            vec![ComparableDocFeature {
                value: Some(2),
                order: Order::Asc,
            }],
-            doc_value_fields: Default::default(),
+            Default::default(),
-        };
+        );
        assert!(features_1 < features_2);
@@ -703,39 +689,39 @@ mod tests {
                    segment_ord: 0,
                    doc_id: 0,
                },
-                feature: DocSortValuesAndFields {
+                feature: ComparableDocFeatures(
-                    sorts: vec![DocValueAndOrder {
+                    vec![ComparableDocFeature {
                        value: Some(1),
                        order: Order::Asc,
                    }],
-                    doc_value_fields: Default::default(),
+                    Default::default(),
-                },
+                ),
            },
            ComparableDoc {
                doc: crate::DocAddress {
                    segment_ord: 0,
                    doc_id: 2,
                },
-                feature: DocSortValuesAndFields {
+                feature: ComparableDocFeatures(
-                    sorts: vec![DocValueAndOrder {
+                    vec![ComparableDocFeature {
                        value: Some(3),
                        order: Order::Asc,
                    }],
-                    doc_value_fields: Default::default(),
+                    Default::default(),
-                },
+                ),
            },
            ComparableDoc {
                doc: crate::DocAddress {
                    segment_ord: 0,
                    doc_id: 1,
                },
-                feature: DocSortValuesAndFields {
+                feature: ComparableDocFeatures(
-                    sorts: vec![DocValueAndOrder {
+                    vec![ComparableDocFeature {
                        value: Some(5),
                        order: Order::Asc,
                    }],
-                    doc_value_fields: Default::default(),
+                    Default::default(),
-                },
+                ),
            },
        ];
@@ -744,23 +730,23 @@ mod tests {
            collector.collect(doc.feature, doc.doc);
        }
-        let res = collector.into_final_result();
+        let res = collector.finalize();
        assert_eq!(
            res,
            super::TopHitsMetricResult {
                hits: vec![
                    super::TopHitsVecEntry {
-                        sort: vec![docs[0].feature.sorts[0].value],
+                        sort: vec![docs[0].feature.0[0].value],
-                        doc_value_fields: Default::default(),
+                        search_results: Default::default(),
                    },
                    super::TopHitsVecEntry {
-                        sort: vec![docs[1].feature.sorts[0].value],
+                        sort: vec![docs[1].feature.0[0].value],
-                        doc_value_fields: Default::default(),
+                        search_results: Default::default(),
                    },
                    super::TopHitsVecEntry {
-                        sort: vec![docs[2].feature.sorts[0].value],
+                        sort: vec![docs[2].feature.0[0].value],
-                        doc_value_fields: Default::default(),
+                        search_results: Default::default(),
                    },
                ]
            }
@@ -817,7 +803,7 @@ mod tests {
                    {
                        "sort": [common::i64_to_u64(date_2017.unix_timestamp_nanos() as i64)],
                        "docvalue_fields": {
-                            "date": [ OwnedValue::Date(DateTime::from_utc(date_2017)) ],
+                            "date": [ SchemaValue::Date(DateTime::from_utc(date_2017)) ],
                            "text": [ "ccc" ],
                            "text2": [ "ddd" ],
                            "mixed.dyn_arr": [ 3, "4" ],
@@ -826,7 +812,7 @@ mod tests {
                    {
                        "sort": [common::i64_to_u64(date_2016.unix_timestamp_nanos() as i64)],
                        "docvalue_fields": {
-                            "date": [ OwnedValue::Date(DateTime::from_utc(date_2016)) ],
+                            "date": [ SchemaValue::Date(DateTime::from_utc(date_2016)) ],
                            "text": [ "aaa" ],
                            "text2": [ "bbb" ],
                            "mixed.dyn_arr": [ 6, "7" ],
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -417,6 +417,7 @@ mod tests {
    use time::OffsetDateTime;
    use super::agg_req::Aggregations;
    use super::segment_agg_result::AggregationLimits;
    use super::*;
    use crate::indexer::NoMergePolicy;
    use crate::query::{AllQuery, TermQuery};
--- a/src/aggregation/segment_agg_result.rs
+++ b/src/aggregation/segment_agg_result.rs
@@ -16,7 +16,7 @@ use super::metric::{
    SumAggregation,
 };
 use crate::aggregation::bucket::TermMissingAgg;
-use crate::aggregation::metric::TopHitsSegmentCollector;
+use crate::aggregation::metric::SegmentTopHitsCollector;
 pub(crate) trait SegmentAggregationCollector: CollectorClone + Debug {
    fn add_intermediate_aggregation_result(
@@ -161,7 +161,7 @@ pub(crate) fn build_single_agg_segment_collector(
                accessor_idx,
            )?,
        )),
-        TopHits(top_hits_req) => Ok(Box::new(TopHitsSegmentCollector::from_req(
+        TopHits(top_hits_req) => Ok(Box::new(SegmentTopHitsCollector::from_req(
            top_hits_req,
            accessor_idx,
            req.segment_ordinal,
--- a/src/collector/histogram_collector.rs
+++ b/src/collector/histogram_collector.rs
@@ -160,7 +160,7 @@ mod tests {
    use super::{add_vecs, HistogramCollector, HistogramComputer};
    use crate::schema::{Schema, FAST};
    use crate::time::{Date, Month};
-    use crate::{query, DateTime, Index};
+    use crate::{doc, query, DateTime, Index};
    #[test]
    fn test_add_histograms_simple() {
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -274,10 +274,6 @@ pub trait SegmentCollector: 'static {
    fn collect(&mut self, doc: DocId, score: Score);
    /// The query pushes the scored document to the collector via this method.
    /// This method is used when the collector does not require scoring.
    ///
    /// See [`COLLECT_BLOCK_BUFFER_LEN`](crate::COLLECT_BLOCK_BUFFER_LEN) for the
    /// buffer size passed to the collector.
    fn collect_block(&mut self, docs: &[DocId]) {
        for doc in docs {
            self.collect(*doc, 0.0);
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -52,16 +52,10 @@ impl<TCollector: Collector> Collector for CollectorWrapper<TCollector> {
 impl SegmentCollector for Box<dyn BoxableSegmentCollector> {
    type Fruit = Box<dyn Fruit>;
    #[inline]
    fn collect(&mut self, doc: u32, score: Score) {
        self.as_mut().collect(doc, score);
    }
    #[inline]
    fn collect_block(&mut self, docs: &[DocId]) {
        self.as_mut().collect_block(docs);
    }
    fn harvest(self) -> Box<dyn Fruit> {
        BoxableSegmentCollector::harvest_from_box(self)
    }
@@ -69,11 +63,6 @@ impl SegmentCollector for Box<dyn BoxableSegmentCollector> {
 pub trait BoxableSegmentCollector {
    fn collect(&mut self, doc: u32, score: Score);
    fn collect_block(&mut self, docs: &[DocId]) {
        for &doc in docs {
            self.collect(doc, 0.0);
        }
    }
    fn harvest_from_box(self: Box<Self>) -> Box<dyn Fruit>;
 }
@@ -82,14 +71,9 @@ pub struct SegmentCollectorWrapper<TSegmentCollector: SegmentCollector>(TSegment
 impl<TSegmentCollector: SegmentCollector> BoxableSegmentCollector
    for SegmentCollectorWrapper<TSegmentCollector>
 {
    #[inline]
    fn collect(&mut self, doc: u32, score: Score) {
        self.0.collect(doc, score);
    }
    #[inline]
    fn collect_block(&mut self, docs: &[DocId]) {
        self.0.collect_block(docs);
    }
    fn harvest_from_box(self: Box<Self>) -> Box<dyn Fruit> {
        Box::new(self.0.harvest())
--- a/src/collector/tests.rs
+++ b/src/collector/tests.rs
@@ -1,11 +1,15 @@
 use columnar::{BytesColumn, Column};
 use super::*;
 use crate::collector::{Count, FilterCollector, TopDocs};
 use crate::index::SegmentReader;
 use crate::query::{AllQuery, QueryParser};
 use crate::schema::{Schema, FAST, TEXT};
 use crate::time::format_description::well_known::Rfc3339;
 use crate::time::OffsetDateTime;
-use crate::{DateTime, DocAddress, Index, Searcher, TantivyDocument};
+use crate::{
    doc, DateTime, DocAddress, DocId, Index, Score, Searcher, SegmentOrdinal, TantivyDocument,
 };
 pub const TEST_COLLECTOR_WITH_SCORE: TestCollector = TestCollector {
    compute_score: true,
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -732,19 +732,6 @@ pub struct TopNComputer<Score, D, const REVERSE_ORDER: bool = true> {
    top_n: usize,
    pub(crate) threshold: Option<Score>,
 }
 impl<Score: std::fmt::Debug, D, const REVERSE_ORDER: bool> std::fmt::Debug
    for TopNComputer<Score, D, REVERSE_ORDER>
 {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("TopNComputer")
            .field("buffer_len", &self.buffer.len())
            .field("top_n", &self.top_n)
            .field("current_threshold", &self.threshold)
            .finish()
    }
 }
 // Intermediate struct for TopNComputer for deserialization, to keep vec capacity
 #[derive(Deserialize)]
 struct TopNComputerDeser<Score, D, const REVERSE_ORDER: bool> {
--- a/src/core/tests.rs
+++ b/src/core/tests.rs
@@ -137,6 +137,7 @@ mod mmap_specific {
    use tempfile::TempDir;
    use super::*;
    use crate::Directory;
    #[test]
    fn test_index_on_commit_reload_policy_mmap() -> crate::Result<()> {
--- a/src/directory/composite_file.rs
+++ b/src/directory/composite_file.rs
@@ -1,5 +1,6 @@
 use std::collections::HashMap;
 use std::io::{self, Read, Write};
 use std::iter::ExactSizeIterator;
 use std::ops::Range;
 use common::{BinarySerializable, CountingWriter, HasLen, VInt};
--- a/src/directory/directory.rs
+++ b/src/directory/directory.rs
@@ -1,4 +1,5 @@
 use std::io::Write;
 use std::marker::{Send, Sync};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::time::Duration;
@@ -39,7 +40,6 @@ impl RetryPolicy {
 /// The `DirectoryLock` is an object that represents a file lock.
 ///
 /// It is associated with a lock file, that gets deleted on `Drop.`
 #[allow(dead_code)]
 pub struct DirectoryLock(Box<dyn Send + Sync + 'static>);
 struct DirectoryLockGuard {
--- a/src/directory/tests.rs
+++ b/src/directory/tests.rs
@@ -1,6 +1,6 @@
 use std::io::Write;
 use std::mem;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::sync::atomic::Ordering::SeqCst;
 use std::sync::atomic::{AtomicBool, AtomicUsize};
 use std::sync::Arc;
--- a/src/directory/watch_event_router.rs
+++ b/src/directory/watch_event_router.rs
@@ -32,7 +32,6 @@ pub struct WatchCallbackList {
 /// file change is detected.
 #[must_use = "This `WatchHandle` controls the lifetime of the watch and should therefore be used."]
 #[derive(Clone)]
 #[allow(dead_code)]
 pub struct WatchHandle(Arc<WatchCallback>);
 impl WatchHandle {
--- a/src/docset.rs
+++ b/src/docset.rs
@@ -9,10 +9,7 @@ use crate::DocId;
 /// to compare `[u32; 4]`.
 pub const TERMINATED: DocId = i32::MAX as u32;
-/// The collect_block method on `SegmentCollector` uses a buffer of this size.
+pub const BUFFER_LEN: usize = 64;
 /// Passed results to `collect_block` will not exceed this size and will be
 /// exactly this size as long as we can fill the buffer.
 pub const COLLECT_BLOCK_BUFFER_LEN: usize = 64;
 /// Represents an iterable set of sorted doc ids.
 pub trait DocSet: Send {
@@ -64,7 +61,7 @@ pub trait DocSet: Send {
    /// This method is only here for specific high-performance
    /// use case where batching. The normal way to
    /// go through the `DocId`'s is to call `.advance()`.
-    fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
+    fn fill_buffer(&mut self, buffer: &mut [DocId; BUFFER_LEN]) -> usize {
        if self.doc() == TERMINATED {
            return 0;
        }
@@ -154,7 +151,7 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
        unboxed.seek(target)
    }
-    fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
+    fn fill_buffer(&mut self, buffer: &mut [DocId; BUFFER_LEN]) -> usize {
        let unboxed: &mut TDocSet = self.borrow_mut();
        unboxed.fill_buffer(buffer)
    }
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -79,7 +79,7 @@ mod tests {
    use std::ops::{Range, RangeInclusive};
    use std::path::Path;
-    use columnar::StrColumn;
+    use columnar::{Column, MonotonicallyMappableToU64, StrColumn};
    use common::{ByteCount, HasLen, TerminatingWrite};
    use once_cell::sync::Lazy;
    use rand::prelude::SliceRandom;
--- a/src/index/segment_id.rs
+++ b/src/index/segment_id.rs
@@ -1,4 +1,4 @@
-use std::cmp::Ordering;
+use std::cmp::{Ord, Ordering};
 use std::error::Error;
 use std::fmt;
 use std::str::FromStr;
--- a/src/index/segment_reader.rs
+++ b/src/index/segment_reader.rs
@@ -516,8 +516,8 @@ impl fmt::Debug for SegmentReader {
 mod test {
    use super::*;
    use crate::index::Index;
-    use crate::schema::{SchemaBuilder, Term, STORED, TEXT};
+    use crate::schema::{Schema, SchemaBuilder, Term, STORED, TEXT};
-    use crate::IndexWriter;
+    use crate::{DocId, IndexWriter};
    #[test]
    fn test_merge_field_meta_data_same() {
--- a/src/indexer/doc_id_mapping.rs
+++ b/src/indexer/doc_id_mapping.rs
@@ -158,7 +158,8 @@ mod tests_indexsorting {
    use crate::indexer::doc_id_mapping::DocIdMapping;
    use crate::indexer::NoMergePolicy;
    use crate::query::QueryParser;
-    use crate::schema::*;
+    use crate::schema::document::Value;
    use crate::schema::{Schema, *};
    use crate::{DocAddress, Index, IndexSettings, IndexSortByField, Order};
    fn create_test_index(
--- a/src/indexer/flat_map_with_buffer.rs
+++ b/src/indexer/flat_map_with_buffer.rs
@@ -22,7 +22,6 @@ where
    }
 }
 #[allow(dead_code)]
 pub trait FlatMapWithBufferIter: Iterator {
    /// Function similar to `flat_map`, but allows reusing a shared `Vec`.
    fn flat_map_with_buffer<F, T>(self, fill_buffer: F) -> FlatMapWithBuffer<T, F, Self>
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -806,6 +806,7 @@ mod tests {
    use columnar::{Cardinality, Column, MonotonicallyMappableToU128};
    use itertools::Itertools;
    use proptest::prop_oneof;
    use proptest::strategy::Strategy;
    use super::super::operation::UserOperation;
    use crate::collector::TopDocs;
--- a/src/indexer/log_merge_policy.rs
+++ b/src/indexer/log_merge_policy.rs
@@ -144,9 +144,10 @@ mod tests {
    use once_cell::sync::Lazy;
    use super::*;
-    use crate::index::SegmentMetaInventory;
+    use crate::index::{SegmentId, SegmentMeta, SegmentMetaInventory};
    use crate::indexer::merge_policy::MergePolicy;
    use crate::schema;
    use crate::schema::INDEXED;
    use crate::{schema, SegmentId};
    static INVENTORY: Lazy<SegmentMetaInventory> = Lazy::new(SegmentMetaInventory::default);
--- a/src/indexer/merge_policy.rs
+++ b/src/indexer/merge_policy.rs
@@ -39,6 +39,7 @@ impl MergePolicy for NoMergePolicy {
 pub mod tests {
    use super::*;
    use crate::index::{SegmentId, SegmentMeta};
    /// `MergePolicy` useful for test purposes.
    ///
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -576,7 +576,7 @@ impl IndexMerger {
                    //
                    // Overall the reliable way to know if we have actual frequencies loaded or not
                    // is to check whether the actual decoded array is empty or not.
-                    if has_term_freq == postings.block_cursor.freqs().is_empty() {
+                    if has_term_freq != !postings.block_cursor.freqs().is_empty() {
                        return Err(DataCorruption::comment_only(
                            "Term freqs are inconsistent across segments",
                        )
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -144,115 +144,6 @@ mod tests_mmap {
            assert_eq!(num_docs, 256);
        }
    }
    #[test]
    fn test_json_field_null_byte() {
        // Test when field name contains a zero byte, which has special meaning in tantivy.
        // As a workaround, we convert the zero byte to the ASCII character '0'.
        // https://github.com/quickwit-oss/tantivy/issues/2340
        // https://github.com/quickwit-oss/tantivy/issues/2193
        let field_name_in = "\u{0000}";
        let field_name_out = "0";
        test_json_field_name(field_name_in, field_name_out);
    }
    #[test]
    fn test_json_field_1byte() {
        // Test when field name contains a 1 byte, which has special meaning in tantivy.
        let field_name_in = "\u{0001}";
        let field_name_out = "\u{0001}";
        test_json_field_name(field_name_in, field_name_out);
        // Test when field name contains a 1 byte, which has special meaning in tantivy.
        let field_name_in = "\u{0001}";
        let field_name_out = ".";
        test_json_field_name(field_name_in, field_name_out);
    }
    fn test_json_field_name(field_name_in: &str, field_name_out: &str) {
        let mut schema_builder = Schema::builder();
        let options = JsonObjectOptions::from(TEXT | FAST).set_expand_dots_enabled();
        let field = schema_builder.add_json_field("json", options);
        let index = Index::create_in_ram(schema_builder.build());
        let mut index_writer = index.writer_for_tests().unwrap();
        index_writer
            .add_document(doc!(field=>json!({format!("{field_name_in}"): "test1"})))
            .unwrap();
        index_writer
            .add_document(doc!(field=>json!({format!("a{field_name_in}"): "test2"})))
            .unwrap();
        index_writer
            .add_document(doc!(field=>json!({format!("a{field_name_in}a"): "test3"})))
            .unwrap();
        index_writer
            .add_document(
                doc!(field=>json!({format!("a{field_name_in}a{field_name_in}"): "test4"})),
            )
            .unwrap();
        index_writer
            .add_document(
                doc!(field=>json!({format!("a{field_name_in}.ab{field_name_in}"): "test5"})),
            )
            .unwrap();
        index_writer
            .add_document(
                doc!(field=>json!({format!("a{field_name_in}"): json!({format!("a{field_name_in}"): "test6"}) })),
            )
            .unwrap();
        index_writer
            .add_document(doc!(field=>json!({format!("{field_name_in}a" ): "test7"})))
            .unwrap();
        index_writer.commit().unwrap();
        let reader = index.reader().unwrap();
        let searcher = reader.searcher();
        let parse_query = QueryParser::for_index(&index, Vec::new());
        let test_query = |field_name: &str| {
            let query = parse_query.parse_query(field_name).unwrap();
            let num_docs = searcher.search(&query, &Count).unwrap();
            assert_eq!(num_docs, 1);
        };
        test_query(format!("json.{field_name_out}:test1").as_str());
        test_query(format!("json.a{field_name_out}:test2").as_str());
        test_query(format!("json.a{field_name_out}a:test3").as_str());
        test_query(format!("json.a{field_name_out}a{field_name_out}:test4").as_str());
        test_query(format!("json.a{field_name_out}.ab{field_name_out}:test5").as_str());
        test_query(format!("json.a{field_name_out}.a{field_name_out}:test6").as_str());
        test_query(format!("json.{field_name_out}a:test7").as_str());
        let test_agg = |field_name: &str, expected: &str| {
            let agg_req_str = json!(
            {
              "termagg": {
                "terms": {
                  "field": field_name,
                }
              }
            });
            let agg_req: Aggregations = serde_json::from_value(agg_req_str).unwrap();
            let collector = AggregationCollector::from_aggs(agg_req, Default::default());
            let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
            let res = serde_json::to_value(agg_res).unwrap();
            assert_eq!(res["termagg"]["buckets"][0]["doc_count"], 1);
            assert_eq!(res["termagg"]["buckets"][0]["key"], expected);
        };
        test_agg(format!("json.{field_name_out}").as_str(), "test1");
        test_agg(format!("json.a{field_name_out}").as_str(), "test2");
        test_agg(format!("json.a{field_name_out}a").as_str(), "test3");
        test_agg(
            format!("json.a{field_name_out}a{field_name_out}").as_str(),
            "test4",
        );
        test_agg(
            format!("json.a{field_name_out}.ab{field_name_out}").as_str(),
            "test5",
        );
        test_agg(
            format!("json.a{field_name_out}.a{field_name_out}").as_str(),
            "test6",
        );
        test_agg(format!("json.{field_name_out}a").as_str(), "test7");
    }
    #[test]
    fn test_json_field_expand_dots_enabled_dot_escape_not_required() {
--- a/src/indexer/segment_register.rs
+++ b/src/indexer/segment_register.rs
@@ -103,7 +103,7 @@ impl SegmentRegister {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::index::SegmentMetaInventory;
+    use crate::index::{SegmentId, SegmentMetaInventory};
    use crate::indexer::delete_queue::*;
    fn segment_ids(segment_register: &SegmentRegister) -> Vec<SegmentId> {
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -213,7 +213,7 @@ pub use common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64, HasLen};
 use once_cell::sync::Lazy;
 use serde::{Deserialize, Serialize};
-pub use self::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
+pub use self::docset::{DocSet, TERMINATED};
 #[deprecated(
    since = "0.22.0",
    note = "Will be removed in tantivy 0.23. Use export from snippet module instead"
@@ -391,6 +391,7 @@ pub mod tests {
    use crate::index::SegmentReader;
    use crate::merge_policy::NoMergePolicy;
    use crate::query::BooleanQuery;
    use crate::schema::document::Value;
    use crate::schema::*;
    use crate::{DateTime, DocAddress, Index, IndexWriter, Postings, ReloadPolicy};
--- a/src/postings/compression/mod.rs
+++ b/src/postings/compression/mod.rs
@@ -14,6 +14,7 @@ pub fn compressed_block_size(num_bits: u8) -> usize {
 pub struct BlockEncoder {
    bitpacker: BitPacker4x,
    pub output: [u8; COMPRESSED_BLOCK_MAX_SIZE],
    pub output_len: usize,
 }
 impl Default for BlockEncoder {
@@ -27,6 +28,7 @@ impl BlockEncoder {
        BlockEncoder {
            bitpacker: BitPacker4x::new(),
            output: [0u8; COMPRESSED_BLOCK_MAX_SIZE],
            output_len: 0,
        }
    }
--- a/src/postings/json_postings_writer.rs
+++ b/src/postings/json_postings_writer.rs
@@ -67,18 +67,10 @@ impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
    ) -> io::Result<()> {
        let mut term_buffer = Term::with_capacity(48);
        let mut buffer_lender = BufferLender::default();
        term_buffer.clear_with_field_and_type(Type::Json, Field::from_field_id(0));
        let mut prev_term_id = u32::MAX;
        let mut term_path_len = 0; // this will be set in the first iteration
        for (_field, path_id, term, addr) in term_addrs {
-            if prev_term_id != path_id.path_id() {
+            term_buffer.clear_with_field_and_type(Type::Json, Field::from_field_id(0));
-                term_buffer.truncate_value_bytes(0);
+            term_buffer.append_bytes(ordered_id_to_path[path_id.path_id() as usize].as_bytes());
-                term_buffer.append_path(ordered_id_to_path[path_id.path_id() as usize].as_bytes());
+            term_buffer.append_bytes(&[JSON_END_OF_PATH]);
                term_buffer.append_bytes(&[JSON_END_OF_PATH]);
                term_path_len = term_buffer.len_bytes();
                prev_term_id = path_id.path_id();
            }
            term_buffer.truncate_value_bytes(term_path_len);
            term_buffer.append_bytes(term);
            if let Some(json_value) = term_buffer.value().as_json_value_bytes() {
                let typ = json_value.typ();
--- a/src/postings/skip.rs
+++ b/src/postings/skip.rs
@@ -1,3 +1,5 @@
 use std::convert::TryInto;
 use crate::directory::OwnedBytes;
 use crate::postings::compression::{compressed_block_size, COMPRESSION_BLOCK_SIZE};
 use crate::query::Bm25Weight;
--- a/src/postings/term_info.rs
+++ b/src/postings/term_info.rs
@@ -1,4 +1,5 @@
 use std::io;
 use std::iter::ExactSizeIterator;
 use std::ops::Range;
 use common::{BinarySerializable, FixedSize};
--- a/src/query/all_query.rs
+++ b/src/query/all_query.rs
@@ -1,4 +1,4 @@
-use crate::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
+use crate::docset::{DocSet, BUFFER_LEN, TERMINATED};
 use crate::index::SegmentReader;
 use crate::query::boost_query::BoostScorer;
 use crate::query::explanation::does_not_match;
@@ -54,7 +54,7 @@ impl DocSet for AllScorer {
        self.doc
    }
-    fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
+    fn fill_buffer(&mut self, buffer: &mut [DocId; BUFFER_LEN]) -> usize {
        if self.doc() == TERMINATED {
            return 0;
        }
@@ -96,7 +96,7 @@ impl Scorer for AllScorer {
 #[cfg(test)]
 mod tests {
    use super::AllQuery;
-    use crate::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
+    use crate::docset::{DocSet, BUFFER_LEN, TERMINATED};
    use crate::query::{AllScorer, EnableScoring, Query};
    use crate::schema::{Schema, TEXT};
    use crate::{Index, IndexWriter};
@@ -162,16 +162,16 @@ mod tests {
    pub fn test_fill_buffer() {
        let mut postings = AllScorer {
            doc: 0u32,
-            max_doc: COLLECT_BLOCK_BUFFER_LEN as u32 * 2 + 9,
+            max_doc: BUFFER_LEN as u32 * 2 + 9,
        };
-        let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
+        let mut buffer = [0u32; BUFFER_LEN];
-        assert_eq!(postings.fill_buffer(&mut buffer), COLLECT_BLOCK_BUFFER_LEN);
+        assert_eq!(postings.fill_buffer(&mut buffer), BUFFER_LEN);
-        for i in 0u32..COLLECT_BLOCK_BUFFER_LEN as u32 {
+        for i in 0u32..BUFFER_LEN as u32 {
            assert_eq!(buffer[i as usize], i);
        }
-        assert_eq!(postings.fill_buffer(&mut buffer), COLLECT_BLOCK_BUFFER_LEN);
+        assert_eq!(postings.fill_buffer(&mut buffer), BUFFER_LEN);
-        for i in 0u32..COLLECT_BLOCK_BUFFER_LEN as u32 {
+        for i in 0u32..BUFFER_LEN as u32 {
-            assert_eq!(buffer[i as usize], i + COLLECT_BLOCK_BUFFER_LEN as u32);
+            assert_eq!(buffer[i as usize], i + BUFFER_LEN as u32);
        }
        assert_eq!(postings.fill_buffer(&mut buffer), 9);
    }
--- a/src/query/boolean_query/boolean_weight.rs
+++ b/src/query/boolean_query/boolean_weight.rs
@@ -1,6 +1,6 @@
 use std::collections::HashMap;
-use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
+use crate::docset::BUFFER_LEN;
 use crate::index::SegmentReader;
 use crate::postings::FreqReadingOption;
 use crate::query::explanation::does_not_match;
@@ -228,7 +228,7 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
        callback: &mut dyn FnMut(&[DocId]),
    ) -> crate::Result<()> {
        let scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?;
-        let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
+        let mut buffer = [0u32; BUFFER_LEN];
        match scorer {
            SpecializedScorer::TermUnion(term_scorers) => {
--- a/src/query/boost_query.rs
+++ b/src/query/boost_query.rs
@@ -1,6 +1,6 @@
 use std::fmt;
-use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
+use crate::docset::BUFFER_LEN;
 use crate::fastfield::AliveBitSet;
 use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
 use crate::{DocId, DocSet, Score, SegmentReader, Term};
@@ -105,7 +105,7 @@ impl<S: Scorer> DocSet for BoostScorer<S> {
        self.underlying.seek(target)
    }
-    fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
+    fn fill_buffer(&mut self, buffer: &mut [DocId; BUFFER_LEN]) -> usize {
        self.underlying.fill_buffer(buffer)
    }
--- a/src/query/const_score_query.rs
+++ b/src/query/const_score_query.rs
@@ -1,6 +1,6 @@
 use std::fmt;
-use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
+use crate::docset::BUFFER_LEN;
 use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
 use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, Term};
@@ -119,7 +119,7 @@ impl<TDocSet: DocSet> DocSet for ConstScorer<TDocSet> {
        self.docset.seek(target)
    }
-    fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
+    fn fill_buffer(&mut self, buffer: &mut [DocId; BUFFER_LEN]) -> usize {
        self.docset.fill_buffer(buffer)
    }
--- a/src/query/exist_query.rs
+++ b/src/query/exist_query.rs
@@ -149,7 +149,7 @@ mod tests {
    use crate::query::exist_query::ExistsQuery;
    use crate::query::{BooleanQuery, RangeQuery};
    use crate::schema::{Facet, FacetOptions, Schema, FAST, INDEXED, STRING, TEXT};
-    use crate::{Index, Searcher};
+    use crate::{doc, Index, Searcher};
    #[test]
    fn test_exists_query_simple() -> crate::Result<()> {
--- a/src/query/fuzzy_query.rs
+++ b/src/query/fuzzy_query.rs
@@ -84,7 +84,7 @@ pub struct FuzzyTermQuery {
    distance: u8,
    /// Should a transposition cost 1 or 2?
    transposition_cost_one: bool,
-    /// is a starts with query
+    ///
    prefix: bool,
 }
--- a/src/query/range_query/range_query.rs
+++ b/src/query/range_query/range_query.rs
@@ -477,7 +477,7 @@ mod tests {
    use crate::schema::{
        Field, IntoIpv6Addr, Schema, TantivyDocument, FAST, INDEXED, STORED, TEXT,
    };
-    use crate::{Index, IndexWriter};
+    use crate::{doc, Index, IndexWriter};
    #[test]
    fn test_range_query_simple() -> crate::Result<()> {
--- a/src/query/term_query/term_query.rs
+++ b/src/query/term_query/term_query.rs
@@ -139,7 +139,7 @@ mod tests {
    use crate::collector::{Count, TopDocs};
    use crate::query::{Query, QueryParser, TermQuery};
    use crate::schema::{IndexRecordOption, IntoIpv6Addr, Schema, INDEXED, STORED};
-    use crate::{Index, IndexWriter, Term};
+    use crate::{doc, Index, IndexWriter, Term};
    #[test]
    fn search_ip_test() {
--- a/src/query/term_query/term_weight.rs
+++ b/src/query/term_query/term_weight.rs
@@ -1,5 +1,5 @@
 use super::term_scorer::TermScorer;
-use crate::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN};
+use crate::docset::{DocSet, BUFFER_LEN};
 use crate::fieldnorm::FieldNormReader;
 use crate::index::SegmentReader;
 use crate::postings::SegmentPostings;
@@ -64,7 +64,7 @@ impl Weight for TermWeight {
        callback: &mut dyn FnMut(&[DocId]),
    ) -> crate::Result<()> {
        let mut scorer = self.specialized_scorer(reader, 1.0)?;
-        let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
+        let mut buffer = [0u32; BUFFER_LEN];
        for_each_docset_buffered(&mut scorer, &mut buffer, callback);
        Ok(())
    }
--- a/src/query/vec_docset.rs
+++ b/src/query/vec_docset.rs
@@ -53,7 +53,8 @@ impl HasLen for VecDocSet {
 pub mod tests {
    use super::*;
-    use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
+    use crate::docset::{DocSet, BUFFER_LEN};
    use crate::DocId;
    #[test]
    pub fn test_vec_postings() {
@@ -71,16 +72,16 @@ pub mod tests {
    #[test]
    pub fn test_fill_buffer() {
-        let doc_ids: Vec<DocId> = (1u32..=(COLLECT_BLOCK_BUFFER_LEN as u32 * 2 + 9)).collect();
+        let doc_ids: Vec<DocId> = (1u32..=(BUFFER_LEN as u32 * 2 + 9)).collect();
        let mut postings = VecDocSet::from(doc_ids);
-        let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
+        let mut buffer = [0u32; BUFFER_LEN];
-        assert_eq!(postings.fill_buffer(&mut buffer), COLLECT_BLOCK_BUFFER_LEN);
+        assert_eq!(postings.fill_buffer(&mut buffer), BUFFER_LEN);
-        for i in 0u32..COLLECT_BLOCK_BUFFER_LEN as u32 {
+        for i in 0u32..BUFFER_LEN as u32 {
            assert_eq!(buffer[i as usize], i + 1);
        }
-        assert_eq!(postings.fill_buffer(&mut buffer), COLLECT_BLOCK_BUFFER_LEN);
+        assert_eq!(postings.fill_buffer(&mut buffer), BUFFER_LEN);
-        for i in 0u32..COLLECT_BLOCK_BUFFER_LEN as u32 {
+        for i in 0u32..BUFFER_LEN as u32 {
-            assert_eq!(buffer[i as usize], i + 1 + COLLECT_BLOCK_BUFFER_LEN as u32);
+            assert_eq!(buffer[i as usize], i + 1 + BUFFER_LEN as u32);
        }
        assert_eq!(postings.fill_buffer(&mut buffer), 9);
    }
--- a/src/query/weight.rs
+++ b/src/query/weight.rs
@@ -1,5 +1,5 @@
 use super::Scorer;
-use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
+use crate::docset::BUFFER_LEN;
 use crate::index::SegmentReader;
 use crate::query::Explanation;
 use crate::{DocId, DocSet, Score, TERMINATED};
@@ -22,7 +22,7 @@ pub(crate) fn for_each_scorer<TScorer: Scorer + ?Sized>(
 #[inline]
 pub(crate) fn for_each_docset_buffered<T: DocSet + ?Sized>(
    docset: &mut T,
-    buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN],
+    buffer: &mut [DocId; BUFFER_LEN],
    mut callback: impl FnMut(&[DocId]),
 ) {
    loop {
@@ -105,7 +105,7 @@ pub trait Weight: Send + Sync + 'static {
    ) -> crate::Result<()> {
        let mut docset = self.scorer(reader, 1.0)?;
-        let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
+        let mut buffer = [0u32; BUFFER_LEN];
        for_each_docset_buffered(&mut docset, &mut buffer, callback);
        Ok(())
    }
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -1,5 +1,6 @@
 mod warming;
 use std::convert::TryInto;
 use std::sync::atomic::AtomicU64;
 use std::sync::{atomic, Arc, Weak};
--- a/src/schema/document/de.rs
+++ b/src/schema/document/de.rs
@@ -819,6 +819,7 @@ mod tests {
    use crate::schema::document::existing_type_impls::JsonObjectIter;
    use crate::schema::document::se::BinaryValueSerializer;
    use crate::schema::document::{ReferenceValue, ReferenceValueLeaf};
    use crate::schema::OwnedValue;
    fn serialize_value<'a>(value: ReferenceValue<'a, &'a serde_json::Value>) -> Vec<u8> {
        let mut writer = Vec::new();
--- a/src/schema/document/default_document.rs
+++ b/src/schema/document/default_document.rs
@@ -1,16 +1,15 @@
 use std::collections::{BTreeMap, HashMap, HashSet};
 use std::net::Ipv6Addr;
-use common::{BinarySerializable, DateTime, VInt};
+use common::DateTime;
 use serde_json::Map;
 use crate::schema::document::se::BinaryValueSerializer;
 use crate::schema::document::{
-    BinaryDocumentDeserializer, BinaryDocumentSerializer, DeserializeError, Document, DocumentDeserialize, DocumentDeserializer, ReferenceValue, ReferenceValueLeaf
+    DeserializeError, Document, DocumentDeserialize, DocumentDeserializer,
 };
 use crate::schema::field_type::ValueParsingError;
 use crate::schema::field_value::FieldValueIter;
-use crate::schema::{Facet, Field, FieldValue, NamedFieldDocument, OwnedValue, Schema, Value};
+use crate::schema::{Facet, Field, FieldValue, NamedFieldDocument, OwnedValue, Schema};
 use crate::tokenizer::PreTokenizedString;
 /// TantivyDocument provides a default implementation of the `Document` trait.
@@ -86,36 +85,6 @@ impl IntoIterator for TantivyDocument {
 }
 impl TantivyDocument {
    pub fn to_bytes(&self, buffer: &mut Vec<u8>) -> std::io::Result<()> {
        buffer.clear();
        let num_field_values = self.field_values.len();
        VInt(num_field_values as u64).serialize(buffer)?;
        for (field, value_access) in self.iter_fields_and_values() {
            field.serialize(buffer)?;
            let mut serializer = BinaryValueSerializer::new(buffer);
            match value_access.as_value() {
                ReferenceValue::Leaf(ReferenceValueLeaf::PreTokStr(pre_tokenized_text)) => {
                    serializer.serialize_value(ReferenceValue::Leaf::<&'_ OwnedValue>(
                        ReferenceValueLeaf::Str(&pre_tokenized_text.text),
                    ))?;
                }
                _ => {
                    serializer.serialize_value(value_access.as_value())?;
                }
            }
        }
        Ok(())
    }
    pub fn from_bytes(mut payload: &[u8]) -> Self {
        let deserializer = BinaryDocumentDeserializer::from_reader(&mut payload).unwrap();
        Self::deserialize(deserializer).unwrap()
    }
    /// Creates a new, empty document object
    pub fn new() -> TantivyDocument {
        TantivyDocument::default()
@@ -287,6 +256,7 @@ impl DocParsingError {
 #[cfg(test)]
 mod tests {
    use crate::schema::document::default_document::TantivyDocument;
    use crate::schema::*;
    #[test]
--- a/src/schema/document/owned_value.rs
+++ b/src/schema/document/owned_value.rs
@@ -1,4 +1,4 @@
-use std::collections::BTreeMap;
+use std::collections::{btree_map, BTreeMap};
 use std::fmt;
 use std::net::Ipv6Addr;
@@ -45,7 +45,7 @@ pub enum OwnedValue {
    /// A set of values.
    Array(Vec<Self>),
    /// Dynamic object value.
-    Object(Vec<(String, Self)>),
+    Object(BTreeMap<String, Self>),
    /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
    IpAddr(Ipv6Addr),
 }
@@ -148,10 +148,10 @@ impl ValueDeserialize for OwnedValue {
            fn visit_object<'de, A>(&self, mut access: A) -> Result<Self::Value, DeserializeError>
            where A: ObjectAccess<'de> {
-                let mut elements = Vec::new();
+                let mut elements = BTreeMap::new();
                while let Some((key, value)) = access.next_entry()? {
-                    elements.push((key, value));
+                    elements.insert(key, value);
                }
                Ok(OwnedValue::Object(elements))
@@ -248,13 +248,12 @@ impl<'de> serde::Deserialize<'de> for OwnedValue {
            fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
            where A: MapAccess<'de> {
-                let mut object =
+                let mut object = BTreeMap::new();
-                    map.size_hint()
+
                       .map(Vec::with_capacity)
                       .unwrap_or_default();
                while let Some((key, value)) = map.next_entry()? {
-                    object.push((key, value));
+                    object.insert(key, value);
                }
                Ok(OwnedValue::Object(object))
            }
        }
@@ -364,8 +363,7 @@ impl From<PreTokenizedString> for OwnedValue {
 impl From<BTreeMap<String, OwnedValue>> for OwnedValue {
    fn from(object: BTreeMap<String, OwnedValue>) -> OwnedValue {
-        let key_values = object.into_iter().collect();
+        OwnedValue::Object(object)
        OwnedValue::Object(key_values)
    }
 }
@@ -419,15 +417,18 @@ impl From<serde_json::Value> for OwnedValue {
 impl From<serde_json::Map<String, serde_json::Value>> for OwnedValue {
    fn from(map: serde_json::Map<String, serde_json::Value>) -> Self {
-        let object: Vec<(String, OwnedValue)> = map.into_iter()
+        let mut object = BTreeMap::new();
-            .map(|(key, value)| (key, OwnedValue::from(value)))
+
-            .collect();
+        for (key, value) in map {
            object.insert(key, OwnedValue::from(value));
        }
        OwnedValue::Object(object)
    }
 }
 /// A wrapper type for iterating over a serde_json object producing reference values.
-pub struct ObjectMapIter<'a>(std::slice::Iter<'a, (String, OwnedValue)>);
+pub struct ObjectMapIter<'a>(btree_map::Iter<'a, String, OwnedValue>);
 impl<'a> Iterator for ObjectMapIter<'a> {
    type Item = (&'a str, &'a OwnedValue);
@@ -442,7 +443,9 @@ impl<'a> Iterator for ObjectMapIter<'a> {
 mod tests {
    use super::*;
    use crate::schema::{BytesOptions, Schema};
-    use crate::{Document, TantivyDocument};
+    use crate::time::format_description::well_known::Rfc3339;
    use crate::time::OffsetDateTime;
    use crate::{DateTime, Document, TantivyDocument};
    #[test]
    fn test_parse_bytes_doc() {
--- a/src/schema/field_entry.rs
+++ b/src/schema/field_entry.rs
@@ -136,6 +136,7 @@ impl FieldEntry {
 #[cfg(test)]
 mod tests {
    use serde_json;
    use super::*;
    use crate::schema::{Schema, TextFieldIndexing, TEXT};
--- a/src/schema/schema.rs
+++ b/src/schema/schema.rs
@@ -6,8 +6,10 @@ use serde::de::{SeqAccess, Visitor};
 use serde::ser::SerializeSeq;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use super::ip_options::IpAddrOptions;
 use super::*;
 use crate::json_utils::split_json_path;
 use crate::schema::bytes_options::BytesOptions;
 use crate::TantivyError;
 /// Tantivy has a very strict schema.
@@ -419,7 +421,9 @@ mod tests {
    use matches::{assert_matches, matches};
    use pretty_assertions::assert_eq;
    use serde_json;
    use crate::schema::document::Value;
    use crate::schema::field_type::ValueParsingError;
    use crate::schema::schema::DocParsingError::InvalidJson;
    use crate::schema::*;
--- a/src/schema/term.rs
+++ b/src/schema/term.rs
@@ -1,3 +1,4 @@
 use std::convert::TryInto;
 use std::hash::{Hash, Hasher};
 use std::net::Ipv6Addr;
 use std::{fmt, str};
@@ -217,23 +218,6 @@ impl Term {
        &mut self.0[len_before..]
    }
    /// Appends json path bytes to the Term.
    /// If the path contains 0 bytes, they are replaced by a "0" string.
    /// The 0 byte is used to mark the end of the path.
    ///
    /// This function returns the segment that has just been added.
    #[inline]
    pub fn append_path(&mut self, bytes: &[u8]) -> &mut [u8] {
        let len_before = self.0.len();
        if bytes.contains(&0u8) {
            self.0
                .extend(bytes.iter().map(|&b| if b == 0 { b'0' } else { b }));
        } else {
            self.0.extend_from_slice(bytes);
        }
        &mut self.0[len_before..]
    }
    /// Appends a JSON_PATH_SEGMENT_SEP to the term.
    /// Only used for JSON type.
    #[inline]
--- a/src/store/compression_lz4_block.rs
+++ b/src/store/compression_lz4_block.rs
@@ -1,3 +1,4 @@
 use core::convert::TryInto;
 use std::io::{self};
 use std::mem;
--- a/src/store/compressors.rs
+++ b/src/store/compressors.rs
@@ -2,6 +2,12 @@ use std::io;
 use serde::{Deserialize, Deserializer, Serialize};
 pub trait StoreCompressor {
    fn compress(&self, uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()>;
    fn decompress(&self, compressed: &[u8], decompressed: &mut Vec<u8>) -> io::Result<()>;
    fn get_compressor_id() -> u8;
 }
 /// Compressor can be used on `IndexSettings` to choose
 /// the compressor used to compress the doc store.
 ///
--- a/src/store/decompressors.rs
+++ b/src/store/decompressors.rs
@@ -4,6 +4,12 @@ use serde::{Deserialize, Serialize};
 use super::Compressor;
 pub trait StoreCompressor {
    fn compress(&self, uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()>;
    fn decompress(&self, compressed: &[u8], decompressed: &mut Vec<u8>) -> io::Result<()>;
    fn get_compressor_id() -> u8;
 }
 /// Decompressor is deserialized from the doc store footer, when opening an index.
 #[derive(Clone, Debug, Copy, PartialEq, Eq, Serialize, Deserialize)]
 pub enum Decompressor {
@@ -80,6 +86,7 @@ impl Decompressor {
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::store::Compressor;
    #[test]
    fn compressor_decompressor_id_test() {
--- a/src/store/index/mod.rs
+++ b/src/store/index/mod.rs
@@ -41,7 +41,7 @@ mod tests {
    use std::io;
-    use proptest::prelude::*;
+    use proptest::strategy::{BoxedStrategy, Strategy};
    use super::{SkipIndex, SkipIndexBuilder};
    use crate::directory::OwnedBytes;
@@ -227,6 +227,8 @@ mod tests {
        }
    }
    use proptest::prelude::*;
    proptest! {
        #![proptest_config(ProptestConfig::with_cases(20))]
        #[test]
--- a/src/termdict/fst_termdict/term_info_store.rs
+++ b/src/termdict/fst_termdict/term_info_store.rs
@@ -288,6 +288,7 @@ impl TermInfoStoreWriter {
 #[cfg(test)]
 mod tests {
    use common;
    use common::BinarySerializable;
    use tantivy_bitpacker::{compute_num_bits, BitPacker};
--- a/src/tokenizer/stemmer.rs
+++ b/src/tokenizer/stemmer.rs
@@ -1,7 +1,7 @@
 use std::borrow::Cow;
 use std::mem;
-use rust_stemmers::Algorithm;
+use rust_stemmers::{self, Algorithm};
 use serde::{Deserialize, Serialize};
 use super::{Token, TokenFilter, TokenStream, Tokenizer};
--- a/src/tokenizer/tokenized_string.rs
+++ b/src/tokenizer/tokenized_string.rs
@@ -95,6 +95,7 @@ impl TokenStream for PreTokenizedStream {
 mod tests {
    use super::*;
    use crate::tokenizer::Token;
    #[test]
    fn test_tokenized_stream() {
--- a/sstable/benches/ord_to_term.rs
+++ b/sstable/benches/ord_to_term.rs
@@ -3,7 +3,7 @@ use std::sync::Arc;
 use common::file_slice::FileSlice;
 use common::OwnedBytes;
 use criterion::{criterion_group, criterion_main, Criterion};
-use tantivy_sstable::{Dictionary, MonotonicU64SSTable};
+use tantivy_sstable::{self, Dictionary, MonotonicU64SSTable};
 fn make_test_sstable(suffix: &str) -> FileSlice {
    let mut builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new()).unwrap();
--- a/sstable/benches/stream_bench.rs
+++ b/sstable/benches/stream_bench.rs
@@ -5,7 +5,7 @@ use common::file_slice::FileSlice;
 use criterion::{criterion_group, criterion_main, Criterion};
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
-use tantivy_sstable::{Dictionary, MonotonicU64SSTable};
+use tantivy_sstable::{self, Dictionary, MonotonicU64SSTable};
 const CHARSET: &'static [u8] = b"abcdefghij";
--- a/stacker/Cargo.toml
+++ b/stacker/Cargo.toml
@@ -10,7 +10,7 @@ description = "term hashmap used for indexing"
 [dependencies]
 murmurhash32 = "0.3"
 common = { version = "0.6", path = "../common/", package = "tantivy-common" }
-ahash = { version = "0.8.11", default-features = false, optional = true }
+ahash = { version = "0.8.3", default-features = false, optional = true }
 rand_distr = "0.4.3"
 [[bench]]
--- a/Show More
+++ b/Show More