Revert "add index accessor for index writer (#1159 )"

This reverts commit b256df6599.
add index accessor for index writer (#1159 )
2026-01-05 16:52:55 +00:00 · 2021-09-23 21:49:34 +09:00 · 2021-09-23 21:49:20 +09:00 · 2021-09-23 20:18:27 +09:00 · 2021-09-17 08:52:52 +09:00 · 2021-09-10 23:05:09 +09:00
139 changed files with 2659 additions and 1762 deletions
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -0,0 +1,25 @@
+name: Coverage
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  coverage:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Install Rust
+        run: rustup toolchain install nightly --component llvm-tools-preview
+      - name: Install cargo-llvm-cov
+        run: curl -LsSf https://github.com/taiki-e/cargo-llvm-cov/releases/latest/download/cargo-llvm-cov-x86_64-unknown-linux-gnu.tar.gz | tar xzf - -C ~/.cargo/bin
+      - name: Generate code coverage
+        run: cargo llvm-cov --all-features --workspace --lcov --output-path lcov.info
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v1
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos
+          files: lcov.info
+          fail_ci_if_error: true
--- a/.github/workflows/long_running.yml
+++ b/.github/workflows/long_running.yml
@@ -0,0 +1,24 @@
+name: Rust
+
+on:
+  push:
+    branches: [ main ]
+
+env:
+  CARGO_TERM_COLOR: always
+  NUM_FUNCTIONAL_TEST_ITERATIONS: 20000
+
+jobs:
+  functional_test_unsorted:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Run indexing_unsorted
+      run: cargo test indexing_unsorted -- --ignored
+  functional_test_sorted:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Run indexing_sorted
+      run: cargo test indexing_sorted -- --ignored
+
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -10,7 +10,7 @@ env:
  CARGO_TERM_COLOR: always

 jobs:
-  build:
+  test:

    runs-on: ubuntu-latest

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,21 @@
+Tantivy 0.16.1
+========================
+- Major Bugfix on multivalued fastfield.  #1151
+
+Tantivy 0.16.0
+=========================
+- Bugfix in the filesum check. (@evanxg852000) #1127
+- Bugfix in positions when the index is sorted by a field. (@appaquet) #1125
+
+Tantivy 0.15.3
+=========================
+- Major bugfix. Deleting documents was broken when the index was sorted by a field. (@appaquet, @fulmicoton) #1101
+
+
+Tantivy 0.15.2
+========================
+- Major bugfix. DocStore still panics when a deleted doc is at the beginning of a block. (@appaquet) #1088
+
 Tantivy 0.15.1
 =========================
 - Major bugfix. DocStore panics when first block is deleted. (@appaquet) #1077
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.15.1"
+version = "0.16.1"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -19,7 +19,7 @@ crc32fast = "1.2.1"
 once_cell = "1.7.2"
 regex ={ version = "1.5.4", default-features = false, features = ["std"] }
 tantivy-fst = "0.3"
-memmap = {version = "0.7", optional=true}
+memmap2 = {version = "0.5", optional=true}
 lz4_flex = { version = "0.8.0", default-features = false, features = ["checked-decode"], optional = true }
 brotli = { version = "3.3", optional = true }
 snap = { version = "1.0.5", optional = true }
@@ -31,12 +31,13 @@ num_cpus = "1.13"
 fs2={ version = "0.4.3", optional = true }
 levenshtein_automata = "0.2"
 uuid = { version = "0.8.2", features = ["v4", "serde"] }
-crossbeam = "0.8"
+crossbeam = "0.8.1"
 futures = { version = "0.3.15", features = ["thread-pool"] }
 tantivy-query-grammar = { version="0.15.0", path="./query-grammar" }
 tantivy-bitpacker = { version="0.1", path="./bitpacker" }
-common = { version="0.1", path="./common" }
+common = { version = "0.1", path = "./common/", package = "tantivy-common" }
 fastfield_codecs = { version="0.1", path="./fastfield_codecs", default-features = false }
+ownedbytes = { version="0.1", path="./ownedbytes" }
 stable_deref_trait = "1.2"
 rust-stemmers = "1.2"
 downcast-rs = "1.2"
@@ -53,6 +54,7 @@ rayon = "1.5"
 lru = "0.6.5"
 fastdivide = "0.3"
 itertools = "0.10.0"
+measure_time = "0.7.0"

 [target.'cfg(windows)'.dependencies]
 winapi = "0.3.9"
@@ -62,7 +64,9 @@ rand = "0.8.3"
 maplit = "1.0.2"
 matches = "0.1.8"
 proptest = "1.0"
-criterion = "0.3.4"
+criterion = "0.3.5"
+test-env-log = "0.2.7"
+env_logger = "0.9.0"

 [dev-dependencies.fail]
 version = "0.4"
@@ -79,7 +83,7 @@ overflow-checks = true

 [features]
 default = ["mmap", "lz4-compression" ]
-mmap = ["fs2", "tempfile", "memmap"]
+mmap = ["fs2", "tempfile", "memmap2"]

 brotli-compression = ["brotli"]
 lz4-compression = ["lz4_flex"]
@@ -90,7 +94,7 @@ unstable = [] # useful for benches.
 wasm-bindgen = ["uuid/wasm-bindgen"]

 [workspace]
-members = ["query-grammar", "bitpacker", "common", "fastfield_codecs"]
+members = ["query-grammar", "bitpacker", "common", "fastfield_codecs", "ownedbytes"]

 [badges]
 travis-ci = { repository = "tantivy-search/tantivy" }
--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@

-[![Build Status](https://travis-ci.org/tantivy-search/tantivy.svg?branch=main)](https://travis-ci.org/tantivy-search/tantivy)
+[![Docs](https://docs.rs/tantivy/badge.svg)](https://docs.rs/crate/tantivy/)
+[![Build Status](https://github.com/tantivy-search/tantivy/actions/workflows/test.yml/badge.svg)](https://github.com/tantivy-search/tantivy/actions/workflows/test.yml)
 [![codecov](https://codecov.io/gh/tantivy-search/tantivy/branch/main/graph/badge.svg)](https://codecov.io/gh/tantivy-search/tantivy)
 [![Join the chat at https://gitter.im/tantivy-search/tantivy](https://badges.gitter.im/tantivy-search/tantivy.svg)](https://gitter.im/tantivy-search/tantivy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![Build status](https://ci.appveyor.com/api/projects/status/r7nb13kj23u8m9pj/branch/main?svg=true)](https://ci.appveyor.com/project/fulmicoton/tantivy/branch/main)
 [![Crates.io](https://img.shields.io/crates/v/tantivy.svg)](https://crates.io/crates/tantivy)

 ![Tantivy](https://tantivy-search.github.io/logo/tantivy-logo.png)
--- a/benches/analyzer.rs
+++ b/benches/analyzer.rs
@@ -1,7 +1,7 @@
 use criterion::{criterion_group, criterion_main, Criterion};
 use tantivy::tokenizer::TokenizerManager;

-const ALICE_TXT: &'static str = include_str!("alice.txt");
+const ALICE_TXT: &str = include_str!("alice.txt");

 pub fn criterion_benchmark(c: &mut Criterion) {
    let tokenizer_manager = TokenizerManager::default();
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-bitpacker"
-version = "0.1.0"
+version = "0.1.1"
 edition = "2018"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
--- a/bitpacker/src/lib.rs
+++ b/bitpacker/src/lib.rs
@@ -50,3 +50,32 @@ where
    }
    None
 }
+
+#[test]
+fn test_compute_num_bits() {
+    assert_eq!(compute_num_bits(1), 1u8);
+    assert_eq!(compute_num_bits(0), 0u8);
+    assert_eq!(compute_num_bits(2), 2u8);
+    assert_eq!(compute_num_bits(3), 2u8);
+    assert_eq!(compute_num_bits(4), 3u8);
+    assert_eq!(compute_num_bits(255), 8u8);
+    assert_eq!(compute_num_bits(256), 9u8);
+    assert_eq!(compute_num_bits(5_000_000_000), 33u8);
+}
+
+#[test]
+fn test_minmax_empty() {
+    let vals: Vec<u32> = vec![];
+    assert_eq!(minmax(vals.into_iter()), None);
+}
+
+#[test]
+fn test_minmax_one() {
+    assert_eq!(minmax(vec![1].into_iter()), Some((1, 1)));
+}
+
+#[test]
+fn test_minmax_two() {
+    assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2)));
+    assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2)));
+}
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "common"
+name = "tantivy-common"
 version = "0.1.0"
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 license = "MIT"
@@ -10,3 +10,7 @@ description = "common traits and utility functions used by multiple tantivy subc

 [dependencies]
 byteorder = "1.4.3"
+
+[dev-dependencies]
+proptest = "1.0.0"
+rand = "0.8.4"
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -2,7 +2,7 @@ use std::fmt;
 use std::u64;

 #[derive(Clone, Copy, Eq, PartialEq)]
-pub(crate) struct TinySet(u64);
+pub struct TinySet(u64);

 impl fmt::Debug for TinySet {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -178,7 +178,7 @@ impl BitSet {
    ///
    /// Reminder: the tiny set with the bucket `bucket`, represents the
    /// elements from `bucket * 64` to `(bucket+1) * 64`.
-    pub(crate) fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
+    pub fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
        self.tinysets[bucket as usize..]
            .iter()
            .cloned()
@@ -193,7 +193,7 @@ impl BitSet {
    /// Returns the tiny bitset representing the
    /// the set restricted to the number range from
    /// `bucket * 64` to `(bucket + 1) * 64`.
-    pub(crate) fn tinyset(&self, bucket: u32) -> TinySet {
+    pub fn tinyset(&self, bucket: u32) -> TinySet {
        self.tinysets[bucket as usize]
    }
 }
@@ -203,11 +203,9 @@ mod tests {

    use super::BitSet;
    use super::TinySet;
-    use crate::docset::{DocSet, TERMINATED};
-    use crate::query::BitSetDocSet;
-    use crate::tests;
-    use crate::tests::generate_nonunique_unsorted;
-    use std::collections::BTreeSet;
+    use rand::distributions::Bernoulli;
+    use rand::rngs::StdRng;
+    use rand::{Rng, SeedableRng};
    use std::collections::HashSet;

    #[test]
@@ -263,29 +261,6 @@ mod tests {
        test_against_hashset(&[62u32, 63u32], 64);
    }

-    #[test]
-    fn test_bitset_large() {
-        let arr = generate_nonunique_unsorted(100_000, 5_000);
-        let mut btreeset: BTreeSet<u32> = BTreeSet::new();
-        let mut bitset = BitSet::with_max_value(100_000);
-        for el in arr {
-            btreeset.insert(el);
-            bitset.insert(el);
-        }
-        for i in 0..100_000 {
-            assert_eq!(btreeset.contains(&i), bitset.contains(i));
-        }
-        assert_eq!(btreeset.len(), bitset.len());
-        let mut bitset_docset = BitSetDocSet::from(bitset);
-        let mut remaining = true;
-        for el in btreeset.into_iter() {
-            assert!(remaining);
-            assert_eq!(bitset_docset.doc(), el);
-            remaining = bitset_docset.advance() != TERMINATED;
-        }
-        assert!(!remaining);
-    }
-
    #[test]
    fn test_bitset_num_buckets() {
        use super::num_buckets;
@@ -340,10 +315,23 @@ mod tests {
        assert_eq!(bitset.len(), 3);
    }

+    pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
+        StdRng::from_seed([seed_val; 32])
+            .sample_iter(&Bernoulli::new(ratio).unwrap())
+            .take(n as usize)
+            .enumerate()
+            .filter_map(|(val, keep)| if keep { Some(val as u32) } else { None })
+            .collect()
+    }
+
+    pub fn sample(n: u32, ratio: f64) -> Vec<u32> {
+        sample_with_seed(n, ratio, 4)
+    }
+
    #[test]
    fn test_bitset_clear() {
        let mut bitset = BitSet::with_max_value(1_000);
-        let els = tests::sample(1_000, 0.01f64);
+        let els = sample(1_000, 0.01f64);
        for &el in &els {
            bitset.insert(el);
        }
--- a/common/src/lib.rs
+++ b/common/src/lib.rs
@@ -1,9 +1,167 @@
+use std::ops::Deref;
+
 pub use byteorder::LittleEndian as Endianness;

+mod bitset;
 mod serialize;
 mod vint;
 mod writer;

+pub use bitset::*;
 pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
 pub use vint::{read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt};
 pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};
+
+/// Has length trait
+pub trait HasLen {
+    /// Return length
+    fn len(&self) -> usize;
+
+    /// Returns true iff empty.
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+impl<T: Deref<Target = [u8]>> HasLen for T {
+    fn len(&self) -> usize {
+        self.deref().len()
+    }
+}
+
+const HIGHEST_BIT: u64 = 1 << 63;
+
+/// Maps a `i64` to `u64`
+///
+/// For simplicity, tantivy internally handles `i64` as `u64`.
+/// The mapping is defined by this function.
+///
+/// Maps `i64` to `u64` so that
+/// `-2^63 .. 2^63-1` is mapped
+///     to
+/// `0 .. 2^64-1`
+/// in that order.
+///
+/// This is more suited than simply casting (`val as u64`)
+/// because of bitpacking.
+///
+/// Imagine a list of `i64` ranging from -10 to 10.
+/// When casting negative values, the negative values are projected
+/// to values over 2^63, and all values end up requiring 64 bits.
+///
+/// # See also
+/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html).
+#[inline]
+pub fn i64_to_u64(val: i64) -> u64 {
+    (val as u64) ^ HIGHEST_BIT
+}
+
+/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
+#[inline]
+pub fn u64_to_i64(val: u64) -> i64 {
+    (val ^ HIGHEST_BIT) as i64
+}
+
+/// Maps a `f64` to `u64`
+///
+/// For simplicity, tantivy internally handles `f64` as `u64`.
+/// The mapping is defined by this function.
+///
+/// Maps `f64` to `u64` in a monotonic manner, so that bytes lexical order is preserved.
+///
+/// This is more suited than simply casting (`val as u64`)
+/// which would truncate the result
+///
+/// # Reference
+///
+/// Daniel Lemire's [blog post](https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order/)
+/// explains the mapping in a clear manner.
+///
+/// # See also
+/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html).
+#[inline]
+pub fn f64_to_u64(val: f64) -> u64 {
+    let bits = val.to_bits();
+    if val.is_sign_positive() {
+        bits ^ HIGHEST_BIT
+    } else {
+        !bits
+    }
+}
+
+/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
+#[inline]
+pub fn u64_to_f64(val: u64) -> f64 {
+    f64::from_bits(if val & HIGHEST_BIT != 0 {
+        val ^ HIGHEST_BIT
+    } else {
+        !val
+    })
+}
+
+#[cfg(test)]
+pub mod test {
+
+    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
+    use super::{BinarySerializable, FixedSize};
+    use proptest::prelude::*;
+    use std::f64;
+
+    fn test_i64_converter_helper(val: i64) {
+        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
+    }
+
+    fn test_f64_converter_helper(val: f64) {
+        assert_eq!(u64_to_f64(f64_to_u64(val)), val);
+    }
+
+    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
+        let mut buffer = Vec::new();
+        O::default().serialize(&mut buffer).unwrap();
+        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
+    }
+
+    proptest! {
+        #[test]
+        fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) {
+            let left_u64 = f64_to_u64(left);
+            let right_u64 = f64_to_u64(right);
+            assert_eq!(left_u64 < right_u64,  left < right);
+        }
+    }
+
+    #[test]
+    fn test_i64_converter() {
+        assert_eq!(i64_to_u64(i64::min_value()), u64::min_value());
+        assert_eq!(i64_to_u64(i64::max_value()), u64::max_value());
+        test_i64_converter_helper(0i64);
+        test_i64_converter_helper(i64::min_value());
+        test_i64_converter_helper(i64::max_value());
+        for i in -1000i64..1000i64 {
+            test_i64_converter_helper(i);
+        }
+    }
+
+    #[test]
+    fn test_f64_converter() {
+        test_f64_converter_helper(f64::INFINITY);
+        test_f64_converter_helper(f64::NEG_INFINITY);
+        test_f64_converter_helper(0.0);
+        test_f64_converter_helper(-0.0);
+        test_f64_converter_helper(1.0);
+        test_f64_converter_helper(-1.0);
+    }
+
+    #[test]
+    fn test_f64_order() {
+        assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
+            .contains(&f64_to_u64(f64::NAN))); //nan is not a number
+        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
+        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
+        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
+        assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
+        assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
+        assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
+        assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5));
+    }
+}
--- a/common/src/vint.rs
+++ b/common/src/vint.rs
@@ -106,7 +106,7 @@ pub fn read_u32_vint_no_advance(data: &[u8]) -> (u32, usize) {
 pub fn write_u32_vint<W: io::Write>(val: u32, writer: &mut W) -> io::Result<()> {
    let mut buf = [0u8; 8];
    let data = serialize_vint_u32(val, &mut buf);
-    writer.write_all(&data)
+    writer.write_all(data)
 }

 impl VInt {
@@ -181,8 +181,8 @@ mod tests {
    fn aux_test_vint(val: u64) {
        let mut v = [14u8; 10];
        let num_bytes = VInt(val).serialize_into(&mut v);
-        for i in num_bytes..10 {
-            assert_eq!(v[i], 14u8);
+        for el in &v[num_bytes..10] {
+            assert_eq!(el, &14u8);
        }
        assert!(num_bytes > 0);
        if num_bytes < 10 {
--- a/doc/src/SUMMARY.md
+++ b/doc/src/SUMMARY.md
@@ -7,6 +7,7 @@
 - [Segments](./basis.md)
 - [Defining your schema](./schema.md)
 - [Facetting](./facetting.md)
+- [Index Sorting](./index_sorting.md)
 - [Innerworkings](./innerworkings.md)
  - [Inverted index](./inverted_index.md)
 - [Best practise](./inverted_index.md)
--- a/doc/src/index_sorting.md
+++ b/doc/src/index_sorting.md
@@ -0,0 +1,61 @@
+
+- [Index Sorting](#index-sorting)
+    + [Why Sorting](#why-sorting)
+        * [Compression](#compression)
+        * [Top-N Optimization](#top-n-optimization)
+        * [Pruning](#pruning)
+        * [Other](#other)
+    + [Usage](#usage)
+
+# Index Sorting
+
+Tantivy allows you to sort the index according to a property.
+
+## Why Sorting
+
+Presorting an index has several advantages:
+
+###### Compression
+
+When data is sorted it is easier to compress the data. E.g. the numbers sequence [5, 2, 3, 1, 4] would be sorted to [1, 2, 3, 4, 5]. 
+If we apply delta encoding this list would be unsorted [5, -3, 1, -2, 3] vs. [1, 1, 1, 1, 1].
+Compression ratio is mainly affected on the fast field of the sorted property, every thing else is likely unaffected. 
+###### Top-N Optimization
+
+When data is presorted by a field and search queries request sorting by the same field, we can leverage the natural order of the documents. 
+E.g. if the data is sorted by timestamp and want the top n newest docs containing a term, we can simply leveraging the order of the docids.
+
+Note: Tantivy 0.16 does not do this optimization yet.
+
+###### Pruning
+
+Let's say we want all documents and want to apply the filter `>= 2010-08-11`. When the data is sorted, we could make a lookup in the fast field to find the docid range and use this as the filter.
+
+Note: Tantivy 0.16 does not do this optimization yet.
+
+###### Other?
+
+In principle there are many algorithms possible that exploit the monotonically increasing nature. (aggregations maybe?)
+
+## Usage
+The index sorting can be configured setting [`sort_by_field`](https://github.com/tantivy-search/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/core/index_meta.rs#L238) on `IndexSettings` and passing it to a `IndexBuilder`. As of tantvy 0.16 only fast fields are allowed to be used.
+
+```
+let settings = IndexSettings {
+    sort_by_field: Some(IndexSortByField {
+        field: "intval".to_string(),
+        order: Order::Desc,
+    }),
+    ..Default::default()
+};
+let mut index_builder = Index::builder().schema(schema);
+index_builder = index_builder.settings(settings);
+let index = index_builder.create_in_ram().unwrap();
+```
+
+## Implementation details
+
+Sorting an index is applied in the serialization step. In general there are two serialization steps: [Finishing a single segment](https://github.com/tantivy-search/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/indexer/segment_writer.rs#L338) and [merging multiple segments](https://github.com/tantivy-search/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/indexer/merger.rs#L1073).
+
+In both cases we generate a docid mapping reflecting the sort. This mapping is used when serializing the different components (doc store, fastfields, posting list, normfield, facets).
+
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -86,12 +86,10 @@ impl Collector for StatsCollector {

    fn merge_fruits(&self, segment_stats: Vec<Option<Stats>>) -> tantivy::Result<Option<Stats>> {
        let mut stats = Stats::default();
-        for segment_stats_opt in segment_stats {
-            if let Some(segment_stats) = segment_stats_opt {
-                stats.count += segment_stats.count;
-                stats.sum += segment_stats.sum;
-                stats.squared_sum += segment_stats.squared_sum;
-            }
+        for segment_stats in segment_stats.into_iter().flatten() {
+            stats.count += segment_stats.count;
+            stats.sum += segment_stats.sum;
+            stats.squared_sum += segment_stats.squared_sum;
        }
        Ok(stats.non_zero_count())
    }
@@ -139,7 +137,7 @@ fn main() -> tantivy::Result<()> {
    //
    // Lets index a bunch of fake documents for the sake of
    // this example.
-    let index = Index::create_in_ram(schema.clone());
+    let index = Index::create_in_ram(schema);

    let mut index_writer = index.writer(50_000_000)?;
    index_writer.add_document(doc!(
--- a/examples/faceted_search_with_tweaked_score.rs
+++ b/examples/faceted_search_with_tweaked_score.rs
@@ -12,7 +12,7 @@ fn main() -> tantivy::Result<()> {
    let ingredient = schema_builder.add_facet_field("ingredient", INDEXED);

    let schema = schema_builder.build();
-    let index = Index::create_in_ram(schema.clone());
+    let index = Index::create_in_ram(schema);

    let mut index_writer = index.writer(30_000_000)?;

@@ -51,7 +51,7 @@ fn main() -> tantivy::Result<()> {
        let query = BooleanQuery::new_multiterms_query(
            facets
                .iter()
-                .map(|key| Term::from_facet(ingredient, &key))
+                .map(|key| Term::from_facet(ingredient, key))
                .collect(),
        );
        let top_docs_by_custom_score =
--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -22,7 +22,7 @@ fn main() -> tantivy::Result<()> {
    let title = schema_builder.add_text_field("title", TEXT | STORED);
    let schema = schema_builder.build();

-    let index = Index::create_in_ram(schema.clone());
+    let index = Index::create_in_ram(schema);

    let mut index_writer = index.writer_with_num_threads(1, 50_000_000)?;
    index_writer.add_document(doc!(title => "The Old Man and the Sea"));
--- a/examples/pre_tokenized_text.rs
+++ b/examples/pre_tokenized_text.rs
@@ -82,7 +82,7 @@ fn main() -> tantivy::Result<()> {
        }]
    }"#;

-    let short_man_doc = schema.parse_document(&short_man_json)?;
+    let short_man_doc = schema.parse_document(short_man_json)?;

    index_writer.add_document(short_man_doc);

--- a/examples/snippet.rs
+++ b/examples/snippet.rs
@@ -25,7 +25,7 @@ fn main() -> tantivy::Result<()> {
    let schema = schema_builder.build();

    // # Indexing documents
-    let index = Index::create_in_dir(&index_path, schema.clone())?;
+    let index = Index::create_in_dir(&index_path, schema)?;

    let mut index_writer = index.writer(50_000_000)?;

--- a/examples/working_with_json.rs
+++ b/examples/working_with_json.rs
@@ -1,4 +1,3 @@
-use tantivy;
 use tantivy::schema::*;

 // # Document from json
@@ -22,7 +21,7 @@ fn main() -> tantivy::Result<()> {
    }"#;

    // We can parse our document
-    let _mice_and_men_doc = schema.parse_document(&mice_and_men_doc_json)?;
+    let _mice_and_men_doc = schema.parse_document(mice_and_men_doc_json)?;

    // Multi-valued field are allowed, they are
    // expressed in JSON by an array.
@@ -31,7 +30,7 @@ fn main() -> tantivy::Result<()> {
       "title": ["Frankenstein", "The Modern Prometheus"],
       "year": 1818
    }"#;
-    let _frankenstein_doc = schema.parse_document(&frankenstein_json)?;
+    let _frankenstein_doc = schema.parse_document(frankenstein_json)?;

    // Note that the schema is saved in your index directory.
    //
--- a/fastfield_codecs/Cargo.toml
+++ b/fastfield_codecs/Cargo.toml
@@ -9,17 +9,16 @@ description = "Fast field codecs used by tantivy"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-common = { path = "../common/" }
-tantivy-bitpacker = { path = "../bitpacker/" }
+common = { version = "0.1", path = "../common/", package = "tantivy-common" }
+tantivy-bitpacker = { version="0.1.1", path = "../bitpacker/" }
 prettytable-rs = {version="0.8.0", optional= true}
-#prettytable-rs = {version="0.8.0" }
-rand = "0.8.3"
+rand = {version="0.8.3", optional= true}

 [dev-dependencies]
 more-asserts = "0.2.1"
 rand = "0.8.3"

 [features]
-bin = ["prettytable-rs"]
+bin = ["prettytable-rs", "rand"]
 default = ["bin"]

--- a/fastfield_codecs/benches/bench.rs
+++ b/fastfield_codecs/benches/bench.rs
@@ -27,8 +27,7 @@ mod tests {
    }

    fn value_iter() -> impl Iterator<Item = u64> {
-        let data = (0..20_000).collect::<Vec<_>>();
-        data.into_iter()
+        0..20_000
    }
    fn bench_get<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
        b: &mut Bencher,
@@ -38,7 +37,7 @@ mod tests {
        S::serialize(
            &mut bytes,
            &data,
-            stats_from_vec(&data),
+            stats_from_vec(data),
            data.iter().cloned(),
            data.iter().cloned(),
        )
@@ -56,7 +55,7 @@ mod tests {
            S::serialize(
                &mut bytes,
                &data,
-                stats_from_vec(&data),
+                stats_from_vec(data),
                data.iter().cloned(),
                data.iter().cloned(),
            )
--- a/fastfield_codecs/src/bitpacked.rs
+++ b/fastfield_codecs/src/bitpacked.rs
@@ -35,7 +35,7 @@ impl<'data> FastFieldCodecReader for BitpackedFastFieldReader {
    }
    #[inline]
    fn get_u64(&self, doc: u64, data: &[u8]) -> u64 {
-        self.min_value_u64 + self.bit_unpacker.get(doc, &data)
+        self.min_value_u64 + self.bit_unpacker.get(doc, data)
    }
    #[inline]
    fn min_value(&self) -> u64 {
@@ -147,7 +147,7 @@ mod tests {

    fn create_and_validate(data: &[u64], name: &str) {
        crate::tests::create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(
-            &data, name,
+            data, name,
        );
    }

@@ -165,7 +165,7 @@ mod tests {
    fn bitpacked_fast_field_rand() {
        for _ in 0..500 {
            let mut data = (0..1 + rand::random::<u8>() as usize)
-                .map(|_| rand::random::<i64>() as u64 / 2 as u64)
+                .map(|_| rand::random::<i64>() as u64 / 2)
                .collect::<Vec<_>>();
            create_and_validate(&data, "rand");

--- a/fastfield_codecs/src/lib.rs
+++ b/fastfield_codecs/src/lib.rs
@@ -51,14 +51,14 @@ pub trait FastFieldCodecSerializer {

 /// FastFieldDataAccess is the trait to access fast field data during serialization and estimation.
 pub trait FastFieldDataAccess {
-    /// Return the value associated to the given document.
+    /// Return the value associated to the given position.
    ///
    /// Whenever possible use the Iterator passed to the fastfield creation instead, for performance reasons.
    ///
    /// # Panics
    ///
-    /// May panic if `doc` is greater than the segment
-    fn get(&self, doc: u32) -> u64;
+    /// May panic if `position` is greater than the index.
+    fn get_val(&self, position: u64) -> u64;
 }

 #[derive(Debug, Clone)]
@@ -69,20 +69,14 @@ pub struct FastFieldStats {
 }

 impl<'a> FastFieldDataAccess for &'a [u64] {
-    fn get(&self, doc: u32) -> u64 {
-        self[doc as usize]
-    }
-}
-
-impl<'a> FastFieldDataAccess for &'a Vec<u64> {
-    fn get(&self, doc: u32) -> u64 {
-        self[doc as usize]
+    fn get_val(&self, position: u64) -> u64 {
+        self[position as usize]
    }
 }

 impl FastFieldDataAccess for Vec<u64> {
-    fn get(&self, doc: u32) -> u64 {
-        self[doc as usize]
+    fn get_val(&self, position: u64) -> u64 {
+        self[position as usize]
    }
 }

@@ -100,15 +94,15 @@ mod tests {
        data: &[u64],
        name: &str,
    ) -> (f32, f32) {
-        if !S::is_applicable(&data, crate::tests::stats_from_vec(&data)) {
+        if !S::is_applicable(&data, crate::tests::stats_from_vec(data)) {
            return (f32::MAX, 0.0);
        }
-        let estimation = S::estimate(&data, crate::tests::stats_from_vec(&data));
+        let estimation = S::estimate(&data, crate::tests::stats_from_vec(data));
        let mut out = vec![];
        S::serialize(
            &mut out,
            &data,
-            crate::tests::stats_from_vec(&data),
+            crate::tests::stats_from_vec(data),
            data.iter().cloned(),
            data.iter().cloned(),
        )
@@ -125,7 +119,7 @@ mod tests {
            }
        }
        let actual_compression = data.len() as f32 / out.len() as f32;
-        return (estimation, actual_compression);
+        (estimation, actual_compression)
    }
    pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
        let mut data_and_names = vec![];
--- a/fastfield_codecs/src/linearinterpol.rs
+++ b/fastfield_codecs/src/linearinterpol.rs
@@ -78,7 +78,7 @@ impl FastFieldCodecReader for LinearInterpolFastFieldReader {
    #[inline]
    fn get_u64(&self, doc: u64, data: &[u8]) -> u64 {
        let calculated_value = get_calculated_value(self.footer.first_val, doc, self.slope);
-        (calculated_value + self.bit_unpacker.get(doc, &data)) - self.footer.offset
+        (calculated_value + self.bit_unpacker.get(doc, data)) - self.footer.offset
    }

    #[inline]
@@ -123,8 +123,8 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
    ) -> io::Result<()> {
        assert!(stats.min_value <= stats.max_value);

-        let first_val = fastfield_accessor.get(0);
-        let last_val = fastfield_accessor.get(stats.num_vals as u32 - 1);
+        let first_val = fastfield_accessor.get_val(0);
+        let last_val = fastfield_accessor.get_val(stats.num_vals as u64 - 1);
        let slope = get_slope(first_val, last_val, stats.num_vals);
        // calculate offset to ensure all values are positive
        let mut offset = 0;
@@ -191,8 +191,8 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
    /// where the local maxima for the deviation of the calculated value are and
    /// the offset to shift all values to >=0 is also unknown.
    fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 {
-        let first_val = fastfield_accessor.get(0);
-        let last_val = fastfield_accessor.get(stats.num_vals as u32 - 1);
+        let first_val = fastfield_accessor.get_val(0);
+        let last_val = fastfield_accessor.get_val(stats.num_vals as u64 - 1);
        let slope = get_slope(first_val, last_val, stats.num_vals);

        // let's sample at 0%, 5%, 10% .. 95%, 100%
@@ -205,7 +205,7 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
            .iter()
            .map(|pos| {
                let calculated_value = get_calculated_value(first_val, *pos as u64, slope);
-                let actual_value = fastfield_accessor.get(*pos as u32);
+                let actual_value = fastfield_accessor.get_val(*pos as u64);
                distance(calculated_value, actual_value)
            })
            .max()
@@ -243,7 +243,7 @@ mod tests {
        crate::tests::create_and_validate::<
            LinearInterpolFastFieldSerializer,
            LinearInterpolFastFieldReader,
-        >(&data, name);
+        >(data, name);
    }

    #[test]
@@ -285,9 +285,7 @@ mod tests {
    #[test]
    fn linear_interpol_fast_field_rand() {
        for _ in 0..5000 {
-            let mut data = (0..50 as usize)
-                .map(|_| rand::random::<u64>())
-                .collect::<Vec<_>>();
+            let mut data = (0..50).map(|_| rand::random::<u64>()).collect::<Vec<_>>();
            create_and_validate(&data, "random");

            data.reverse();
--- a/fastfield_codecs/src/main.rs
+++ b/fastfield_codecs/src/main.rs
@@ -30,7 +30,7 @@ fn main() {
        //.unwrap();
        let best_compression_ratio_codec = results
            .iter()
-            .min_by(|res1, res2| res1.partial_cmp(&res2).unwrap())
+            .min_by(|res1, res2| res1.partial_cmp(res2).unwrap())
            .cloned()
            .unwrap();

@@ -41,6 +41,7 @@ fn main() {
            } else {
                (est.to_string(), comp.to_string())
            };
+            #[allow(clippy::all)]
            let style = if comp == best_compression_ratio_codec.1 {
                "Fb"
            } else {
@@ -96,23 +97,23 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
 pub fn serialize_with_codec<S: FastFieldCodecSerializer>(
    data: &[u64],
 ) -> (bool, f32, f32, &'static str) {
-    let is_applicable = S::is_applicable(&data, stats_from_vec(&data));
+    let is_applicable = S::is_applicable(&data, stats_from_vec(data));
    if !is_applicable {
        return (false, 0.0, 0.0, S::NAME);
    }
-    let estimation = S::estimate(&data, stats_from_vec(&data));
+    let estimation = S::estimate(&data, stats_from_vec(data));
    let mut out = vec![];
    S::serialize(
        &mut out,
        &data,
-        stats_from_vec(&data),
+        stats_from_vec(data),
        data.iter().cloned(),
        data.iter().cloned(),
    )
    .unwrap();

    let actual_compression = out.len() as f32 / (data.len() * 8) as f32;
-    return (true, estimation, actual_compression, S::NAME);
+    (true, estimation, actual_compression, S::NAME)
 }

 pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
--- a/fastfield_codecs/src/multilinearinterpol.rs
+++ b/fastfield_codecs/src/multilinearinterpol.rs
@@ -1,3 +1,17 @@
+/*!
+
+MultiLinearInterpol compressor uses linear interpolation to guess a values and stores the offset, but in blocks of 512.
+
+With a CHUNK_SIZE of 512 and 29 byte metadata per block, we get a overhead for metadata of 232 / 512 = 0,45 bits per element.
+The additional space required per element in a block is the the maximum deviation of the linear interpolation estimation function.
+
+E.g. if the maximum deviation of an element is 12, all elements cost 4bits.
+
+Size per block:
+Num Elements * Maximum Deviation from Interpolation + 29 Byte Metadata
+
+*/
+
 use crate::FastFieldCodecReader;
 use crate::FastFieldCodecSerializer;
 use crate::FastFieldDataAccess;
@@ -196,8 +210,8 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
    ) -> io::Result<()> {
        assert!(stats.min_value <= stats.max_value);

-        let first_val = fastfield_accessor.get(0);
-        let last_val = fastfield_accessor.get(stats.num_vals as u32 - 1);
+        let first_val = fastfield_accessor.get_val(0);
+        let last_val = fastfield_accessor.get_val(stats.num_vals as u64 - 1);

        let mut first_function = Function {
            end_pos: stats.num_vals,
@@ -309,9 +323,10 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
    /// where the local maxima are for the deviation of the calculated value and
    /// the offset is also unknown.
    fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 {
-        let first_val_in_first_block = fastfield_accessor.get(0);
+        let first_val_in_first_block = fastfield_accessor.get_val(0);
        let last_elem_in_first_chunk = CHUNK_SIZE.min(stats.num_vals);
-        let last_val_in_first_block = fastfield_accessor.get(last_elem_in_first_chunk as u32 - 1);
+        let last_val_in_first_block =
+            fastfield_accessor.get_val(last_elem_in_first_chunk as u64 - 1);
        let slope = get_slope(
            first_val_in_first_block,
            last_val_in_first_block,
@@ -328,7 +343,7 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
            .map(|pos| {
                let calculated_value =
                    get_calculated_value(first_val_in_first_block, *pos as u64, slope);
-                let actual_value = fastfield_accessor.get(*pos as u32);
+                let actual_value = fastfield_accessor.get_val(*pos as u64);
                distance(calculated_value, actual_value)
            })
            .max()
@@ -367,7 +382,7 @@ mod tests {
        crate::tests::create_and_validate::<
            MultiLinearInterpolFastFieldSerializer,
            MultiLinearInterpolFastFieldReader,
-        >(&data, name);
+        >(data, name);
    }

    #[test]
--- a/ownedbytes/Cargo.toml
+++ b/ownedbytes/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
+name = "ownedbytes"
+version = "0.1.0"
+edition = "2018"
+description = "Expose data as static slice"
+license = "MIT"
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+stable_deref_trait = "1.2.0"
--- a/ownedbytes/src/lib.rs
+++ b/ownedbytes/src/lib.rs
@@ -0,0 +1,290 @@
+use stable_deref_trait::StableDeref;
+use std::convert::TryInto;
+use std::mem;
+use std::ops::{Deref, Range};
+use std::sync::Arc;
+use std::{fmt, io};
+
+/// An OwnedBytes simply wraps an object that owns a slice of data and exposes
+/// this data as a static slice.
+///
+/// The backing object is required to be `StableDeref`.
+#[derive(Clone)]
+pub struct OwnedBytes {
+    data: &'static [u8],
+    box_stable_deref: Arc<dyn Deref<Target = [u8]> + Sync + Send>,
+}
+
+impl OwnedBytes {
+    /// Creates an empty `OwnedBytes`.
+    pub fn empty() -> OwnedBytes {
+        OwnedBytes::new(&[][..])
+    }
+
+    /// Creates an `OwnedBytes` intance given a `StableDeref` object.
+    pub fn new<T: StableDeref + Deref<Target = [u8]> + 'static + Send + Sync>(
+        data_holder: T,
+    ) -> OwnedBytes {
+        let box_stable_deref = Arc::new(data_holder);
+        let bytes: &[u8] = box_stable_deref.as_ref();
+        let data = unsafe { mem::transmute::<_, &'static [u8]>(bytes.deref()) };
+        OwnedBytes {
+            data,
+            box_stable_deref,
+        }
+    }
+
+    /// creates a fileslice that is just a view over a slice of the data.
+    pub fn slice(&self, range: Range<usize>) -> Self {
+        OwnedBytes {
+            data: &self.data[range],
+            box_stable_deref: self.box_stable_deref.clone(),
+        }
+    }
+
+    /// Returns the underlying slice of data.
+    /// `Deref` and `AsRef` are also available.
+    #[inline]
+    pub fn as_slice(&self) -> &[u8] {
+        self.data
+    }
+
+    /// Returns the len of the slice.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    /// Splits the OwnedBytes into two OwnedBytes `(left, right)`.
+    ///
+    /// Left will hold `split_len` bytes.
+    ///
+    /// This operation is cheap and does not require to copy any memory.
+    /// On the other hand, both `left` and `right` retain a handle over
+    /// the entire slice of memory. In other words, the memory will only
+    /// be released when both left and right are dropped.
+    pub fn split(self, split_len: usize) -> (OwnedBytes, OwnedBytes) {
+        let right_box_stable_deref = self.box_stable_deref.clone();
+        let left = OwnedBytes {
+            data: &self.data[..split_len],
+            box_stable_deref: self.box_stable_deref,
+        };
+        let right = OwnedBytes {
+            data: &self.data[split_len..],
+            box_stable_deref: right_box_stable_deref,
+        };
+        (left, right)
+    }
+
+    /// Returns true iff this `OwnedBytes` is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.as_slice().is_empty()
+    }
+
+    /// Drops the left most `advance_len` bytes.
+    ///
+    /// See also [.clip(clip_len: usize))](#method.clip).
+    #[inline]
+    pub fn advance(&mut self, advance_len: usize) {
+        self.data = &self.data[advance_len..]
+    }
+
+    /// Reads an `u8` from the `OwnedBytes` and advance by one byte.
+    #[inline]
+    pub fn read_u8(&mut self) -> u8 {
+        assert!(!self.is_empty());
+
+        let byte = self.as_slice()[0];
+        self.advance(1);
+        byte
+    }
+
+    /// Reads an `u64` encoded as little-endian from the `OwnedBytes` and advance by 8 bytes.
+    #[inline]
+    pub fn read_u64(&mut self) -> u64 {
+        assert!(self.len() > 7);
+
+        let octlet: [u8; 8] = self.as_slice()[..8].try_into().unwrap();
+        self.advance(8);
+        u64::from_le_bytes(octlet)
+    }
+}
+
+impl fmt::Debug for OwnedBytes {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        // We truncate the bytes in order to make sure the debug string
+        // is not too long.
+        let bytes_truncated: &[u8] = if self.len() > 8 {
+            &self.as_slice()[..10]
+        } else {
+            self.as_slice()
+        };
+        write!(f, "OwnedBytes({:?}, len={})", bytes_truncated, self.len())
+    }
+}
+
+impl Deref for OwnedBytes {
+    type Target = [u8];
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        self.as_slice()
+    }
+}
+
+impl io::Read for OwnedBytes {
+    #[inline]
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        let read_len = {
+            let data = self.as_slice();
+            if data.len() >= buf.len() {
+                let buf_len = buf.len();
+                buf.copy_from_slice(&data[..buf_len]);
+                buf.len()
+            } else {
+                let data_len = data.len();
+                buf[..data_len].copy_from_slice(data);
+                data_len
+            }
+        };
+        self.advance(read_len);
+        Ok(read_len)
+    }
+    #[inline]
+    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
+        let read_len = {
+            let data = self.as_slice();
+            buf.extend(data);
+            data.len()
+        };
+        self.advance(read_len);
+        Ok(read_len)
+    }
+    #[inline]
+    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
+        let read_len = self.read(buf)?;
+        if read_len != buf.len() {
+            return Err(io::Error::new(
+                io::ErrorKind::UnexpectedEof,
+                "failed to fill whole buffer",
+            ));
+        }
+        Ok(())
+    }
+}
+
+impl AsRef<[u8]> for OwnedBytes {
+    #[inline]
+    fn as_ref(&self) -> &[u8] {
+        self.as_slice()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::io::{self, Read};
+
+    use super::OwnedBytes;
+
+    #[test]
+    fn test_owned_bytes_debug() {
+        let short_bytes = OwnedBytes::new(b"abcd".as_ref());
+        assert_eq!(
+            format!("{:?}", short_bytes),
+            "OwnedBytes([97, 98, 99, 100], len=4)"
+        );
+        let long_bytes = OwnedBytes::new(b"abcdefghijklmnopq".as_ref());
+        assert_eq!(
+            format!("{:?}", long_bytes),
+            "OwnedBytes([97, 98, 99, 100, 101, 102, 103, 104, 105, 106], len=17)"
+        );
+    }
+
+    #[test]
+    fn test_owned_bytes_read() -> io::Result<()> {
+        let mut bytes = OwnedBytes::new(b"abcdefghiklmnopqrstuvwxyz".as_ref());
+        {
+            let mut buf = [0u8; 5];
+            bytes.read_exact(&mut buf[..]).unwrap();
+            assert_eq!(&buf, b"abcde");
+            assert_eq!(bytes.as_slice(), b"fghiklmnopqrstuvwxyz")
+        }
+        {
+            let mut buf = [0u8; 2];
+            bytes.read_exact(&mut buf[..]).unwrap();
+            assert_eq!(&buf, b"fg");
+            assert_eq!(bytes.as_slice(), b"hiklmnopqrstuvwxyz")
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn test_owned_bytes_read_right_at_the_end() -> io::Result<()> {
+        let mut bytes = OwnedBytes::new(b"abcde".as_ref());
+        let mut buf = [0u8; 5];
+        assert_eq!(bytes.read(&mut buf[..]).unwrap(), 5);
+        assert_eq!(&buf, b"abcde");
+        assert_eq!(bytes.as_slice(), b"");
+        assert_eq!(bytes.read(&mut buf[..]).unwrap(), 0);
+        assert_eq!(&buf, b"abcde");
+        Ok(())
+    }
+    #[test]
+    fn test_owned_bytes_read_incomplete() -> io::Result<()> {
+        let mut bytes = OwnedBytes::new(b"abcde".as_ref());
+        let mut buf = [0u8; 7];
+        assert_eq!(bytes.read(&mut buf[..]).unwrap(), 5);
+        assert_eq!(&buf[..5], b"abcde");
+        assert_eq!(bytes.read(&mut buf[..]).unwrap(), 0);
+        Ok(())
+    }
+
+    #[test]
+    fn test_owned_bytes_read_to_end() -> io::Result<()> {
+        let mut bytes = OwnedBytes::new(b"abcde".as_ref());
+        let mut buf = Vec::new();
+        bytes.read_to_end(&mut buf)?;
+        assert_eq!(buf.as_slice(), b"abcde".as_ref());
+        Ok(())
+    }
+
+    #[test]
+    fn test_owned_bytes_read_u8() -> io::Result<()> {
+        let mut bytes = OwnedBytes::new(b"\xFF".as_ref());
+        assert_eq!(bytes.read_u8(), 255);
+        assert_eq!(bytes.len(), 0);
+        Ok(())
+    }
+
+    #[test]
+    fn test_owned_bytes_read_u64() -> io::Result<()> {
+        let mut bytes = OwnedBytes::new(b"\0\xFF\xFF\xFF\xFF\xFF\xFF\xFF".as_ref());
+        assert_eq!(bytes.read_u64(), u64::MAX - 255);
+        assert_eq!(bytes.len(), 0);
+        Ok(())
+    }
+
+    #[test]
+    fn test_owned_bytes_split() {
+        let bytes = OwnedBytes::new(b"abcdefghi".as_ref());
+        let (left, right) = bytes.split(3);
+        assert_eq!(left.as_slice(), b"abc");
+        assert_eq!(right.as_slice(), b"defghi");
+    }
+
+    #[test]
+    fn test_owned_bytes_split_boundary() {
+        let bytes = OwnedBytes::new(b"abcdefghi".as_ref());
+        {
+            let (left, right) = bytes.clone().split(0);
+            assert_eq!(left.as_slice(), b"");
+            assert_eq!(right.as_slice(), b"abcdefghi");
+        }
+        {
+            let (left, right) = bytes.split(9);
+            assert_eq!(left.as_slice(), b"abcdefghi");
+            assert_eq!(right.as_slice(), b"");
+        }
+    }
+}
--- a/query-grammar/Cargo.toml
+++ b/query-grammar/Cargo.toml
@@ -14,3 +14,5 @@ edition = "2018"

 [dependencies]
 combine = {version="4", default-features=false, features=[] }
+once_cell = "1.7.2"
+regex ={ version = "1.5.4", default-features = false, features = ["std"] }
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -1,21 +1,44 @@
 use super::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral};
 use crate::Occur;
-use combine::parser::char::{char, digit, letter, space, spaces, string};
+use combine::parser::char::{char, digit, space, spaces, string};
+use combine::parser::range::{take_while, take_while1};
+use combine::parser::repeat::escaped;
 use combine::parser::Parser;
 use combine::{
    attempt, choice, eof, many, many1, one_of, optional, parser, satisfy, skip_many1, value,
 };
 use combine::{error::StringStreamError, parser::combinator::recognize};
+use once_cell::sync::Lazy;
+use regex::Regex;

-fn field<'a>() -> impl Parser<&'a str, Output = String> {
-    (
-        (letter().or(char('_'))),
-        many(satisfy(|c: char| {
-            c.is_alphanumeric() || c == '_' || c == '-'
-        })),
-    )
-        .skip(char(':'))
-        .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
+// Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to special characters.
+const SPECIAL_CHARS: &[char] = &[
+    '+', '^', '`', ':', '{', '}', '"', '[', ']', '(', ')', '~', '!', '\\', '*', ' ',
+];
+const ESCAPED_SPECIAL_CHARS_PATTERN: &str = r#"\\(\+|\^|`|:|\{|\}|"|\[|\]|\(|\)|\~|!|\\|\*| )"#;
+
+/// Parses a field_name
+/// A field name must have at least one character and be followed by a colon.
+/// All characters are allowed including special characters `SPECIAL_CHARS`, but these
+/// need to be escaped with a backslack character '\'.
+fn field_name<'a>() -> impl Parser<&'a str, Output = String> {
+    static ESCAPED_SPECIAL_CHARS_RE: Lazy<Regex> =
+        Lazy::new(|| Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap());
+
+    recognize::<String, _, _>(escaped(
+        (
+            take_while1(|c| !SPECIAL_CHARS.contains(&c) && c != '-'),
+            take_while(|c| !SPECIAL_CHARS.contains(&c)),
+        ),
+        '\\',
+        satisfy(|c| SPECIAL_CHARS.contains(&c)),
+    ))
+    .skip(char(':'))
+    .map(|s| ESCAPED_SPECIAL_CHARS_RE.replace_all(&s, "$1").to_string())
+    .and_then(|s: String| match s.is_empty() {
+        true => Err(StringStreamError::UnexpectedParse),
+        _ => Ok(s),
+    })
 }

 fn word<'a>() -> impl Parser<&'a str, Output = String> {
@@ -98,7 +121,7 @@ fn term_val<'a>() -> impl Parser<&'a str, Output = String> {

 fn term_query<'a>() -> impl Parser<&'a str, Output = UserInputLiteral> {
    let term_val_with_field = negative_number().or(term_val());
-    (field(), term_val_with_field).map(|(field_name, phrase)| UserInputLiteral {
+    (field_name(), term_val_with_field).map(|(field_name, phrase)| UserInputLiteral {
        field_name: Some(field_name),
        phrase,
    })
@@ -195,7 +218,7 @@ fn range<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
    );

    (
-        optional(field()).skip(spaces()),
+        optional(field_name()).skip(spaces()),
        // try elastic first, if it matches, the range is unbounded
        attempt(elastic_unbounded_range).or(lower_to_upper),
    )
@@ -464,21 +487,21 @@ mod test {

    #[test]
    fn test_parse_elastic_query_ranges() {
-        test_parse_query_to_ast_helper("title: >a", "title:{\"a\" TO \"*\"}");
-        test_parse_query_to_ast_helper("title:>=a", "title:[\"a\" TO \"*\"}");
-        test_parse_query_to_ast_helper("title: <a", "title:{\"*\" TO \"a\"}");
-        test_parse_query_to_ast_helper("title:<=a", "title:{\"*\" TO \"a\"]");
-        test_parse_query_to_ast_helper("title:<=bsd", "title:{\"*\" TO \"bsd\"]");
+        test_parse_query_to_ast_helper("title: >a", "\"title\":{\"a\" TO \"*\"}");
+        test_parse_query_to_ast_helper("title:>=a", "\"title\":[\"a\" TO \"*\"}");
+        test_parse_query_to_ast_helper("title: <a", "\"title\":{\"*\" TO \"a\"}");
+        test_parse_query_to_ast_helper("title:<=a", "\"title\":{\"*\" TO \"a\"]");
+        test_parse_query_to_ast_helper("title:<=bsd", "\"title\":{\"*\" TO \"bsd\"]");

-        test_parse_query_to_ast_helper("weight: >70", "weight:{\"70\" TO \"*\"}");
-        test_parse_query_to_ast_helper("weight:>=70", "weight:[\"70\" TO \"*\"}");
-        test_parse_query_to_ast_helper("weight: <70", "weight:{\"*\" TO \"70\"}");
-        test_parse_query_to_ast_helper("weight:<=70", "weight:{\"*\" TO \"70\"]");
-        test_parse_query_to_ast_helper("weight: >60.7", "weight:{\"60.7\" TO \"*\"}");
+        test_parse_query_to_ast_helper("weight: >70", "\"weight\":{\"70\" TO \"*\"}");
+        test_parse_query_to_ast_helper("weight:>=70", "\"weight\":[\"70\" TO \"*\"}");
+        test_parse_query_to_ast_helper("weight: <70", "\"weight\":{\"*\" TO \"70\"}");
+        test_parse_query_to_ast_helper("weight:<=70", "\"weight\":{\"*\" TO \"70\"]");
+        test_parse_query_to_ast_helper("weight: >60.7", "\"weight\":{\"60.7\" TO \"*\"}");

-        test_parse_query_to_ast_helper("weight: <= 70", "weight:{\"*\" TO \"70\"]");
+        test_parse_query_to_ast_helper("weight: <= 70", "\"weight\":{\"*\" TO \"70\"]");

-        test_parse_query_to_ast_helper("weight: <= 70.5", "weight:{\"*\" TO \"70.5\"]");
+        test_parse_query_to_ast_helper("weight: <= 70.5", "\"weight\":{\"*\" TO \"70.5\"]");
    }

    #[test]
@@ -491,22 +514,43 @@ mod test {
    #[test]
    fn test_field_name() -> TestParseResult {
        assert_eq!(
-            super::field().parse("my-field-name:a")?,
-            ("my-field-name".to_string(), "a")
+            super::field_name().parse(".my.field.name:a"),
+            Ok((".my.field.name".to_string(), "a"))
        );
        assert_eq!(
-            super::field().parse("my_field_name:a")?,
-            ("my_field_name".to_string(), "a")
+            super::field_name().parse("my\\ field\\ name:a"),
+            Ok(("my field name".to_string(), "a"))
        );
-        assert!(super::field().parse(":a").is_err());
-        assert!(super::field().parse("-my_field:a").is_err());
+        assert!(super::field_name().parse("my field:a").is_err());
        assert_eq!(
-            super::field().parse("_my_field:a")?,
+            super::field_name().parse("\\(1\\+1\\):2"),
+            Ok(("(1+1)".to_string(), "2"))
+        );
+        assert_eq!(
+            super::field_name().parse("my_field_name:a"),
+            Ok(("my_field_name".to_string(), "a"))
+        );
+        assert!(super::field_name().parse("my_field_name").is_err());
+        assert!(super::field_name().parse(":a").is_err());
+        assert!(super::field_name().parse("-my_field:a").is_err());
+        assert_eq!(
+            super::field_name().parse("_my_field:a")?,
            ("_my_field".to_string(), "a")
        );
        Ok(())
    }

+    #[test]
+    fn test_field_name_re() {
+        let escaped_special_chars_re = Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap();
+        for special_char in SPECIAL_CHARS.iter() {
+            assert_eq!(
+                escaped_special_chars_re.replace_all(&format!("\\{}", special_char), "$1"),
+                special_char.to_string()
+            );
+        }
+    }
+
    #[test]
    fn test_range_parser() {
        // testing the range() parser separately
@@ -600,12 +644,14 @@ mod test {

    #[test]
    fn test_single_term_with_field() {
-        test_parse_query_to_ast_helper("abc:toto", "abc:\"toto\"");
+        test_parse_query_to_ast_helper("abc:toto", "\"abc\":\"toto\"");
    }

    #[test]
    fn test_single_term_with_float() {
-        test_parse_query_to_ast_helper("abc:1.1", "abc:\"1.1\"");
+        test_parse_query_to_ast_helper("abc:1.1", "\"abc\":\"1.1\"");
+        test_parse_query_to_ast_helper("a.b.c:1.1", "\"a.b.c\":\"1.1\"");
+        test_parse_query_to_ast_helper("a\\ b\\ c:1.1", "\"a b c\":\"1.1\"");
    }

    #[test]
@@ -621,22 +667,27 @@ mod test {
    #[test]
    fn test_parse_test_query_other() {
        test_parse_query_to_ast_helper("(+a +b) d", "(*(+\"a\" +\"b\") *\"d\")");
-        test_parse_query_to_ast_helper("+abc:toto", "abc:\"toto\"");
-        test_parse_query_to_ast_helper("(+abc:toto -titi)", "(+abc:\"toto\" -\"titi\")");
-        test_parse_query_to_ast_helper("-abc:toto", "(-abc:\"toto\")");
-        test_parse_query_to_ast_helper("abc:a b", "(*abc:\"a\" *\"b\")");
-        test_parse_query_to_ast_helper("abc:\"a b\"", "abc:\"a b\"");
-        test_parse_query_to_ast_helper("foo:[1 TO 5]", "foo:[\"1\" TO \"5\"]");
+        test_parse_query_to_ast_helper("+abc:toto", "\"abc\":\"toto\"");
+        test_parse_query_to_ast_helper("+a\\+b\\+c:toto", "\"a+b+c\":\"toto\"");
+        test_parse_query_to_ast_helper("(+abc:toto -titi)", "(+\"abc\":\"toto\" -\"titi\")");
+        test_parse_query_to_ast_helper("-abc:toto", "(-\"abc\":\"toto\")");
+        test_is_parse_err("--abc:toto");
+        test_parse_query_to_ast_helper("abc:a b", "(*\"abc\":\"a\" *\"b\")");
+        test_parse_query_to_ast_helper("abc:\"a b\"", "\"abc\":\"a b\"");
+        test_parse_query_to_ast_helper("foo:[1 TO 5]", "\"foo\":[\"1\" TO \"5\"]");
    }

    #[test]
    fn test_parse_query_with_range() {
        test_parse_query_to_ast_helper("[1 TO 5]", "[\"1\" TO \"5\"]");
-        test_parse_query_to_ast_helper("foo:{a TO z}", "foo:{\"a\" TO \"z\"}");
-        test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}");
-        test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:{\"*\" TO \"toto\"}");
-        test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}");
-        test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}");
+        test_parse_query_to_ast_helper("foo:{a TO z}", "\"foo\":{\"a\" TO \"z\"}");
+        test_parse_query_to_ast_helper("foo:[1 TO toto}", "\"foo\":[\"1\" TO \"toto\"}");
+        test_parse_query_to_ast_helper("foo:[* TO toto}", "\"foo\":{\"*\" TO \"toto\"}");
+        test_parse_query_to_ast_helper("foo:[1 TO *}", "\"foo\":[\"1\" TO \"*\"}");
+        test_parse_query_to_ast_helper(
+            "1.2.foo.bar:[1.1 TO *}",
+            "\"1.2.foo.bar\":[\"1.1\" TO \"*\"}",
+        );
        test_is_parse_err("abc +    ");
    }
 }
--- a/query-grammar/src/user_input_ast.rs
+++ b/query-grammar/src/user_input_ast.rs
@@ -24,7 +24,7 @@ impl Debug for UserInputLeaf {
                ref upper,
            } => {
                if let Some(ref field) = field {
-                    write!(formatter, "{}:", field)?;
+                    write!(formatter, "\"{}\":", field)?;
                }
                lower.display_lower(formatter)?;
                write!(formatter, " TO ")?;
@@ -45,7 +45,7 @@ pub struct UserInputLiteral {
 impl fmt::Debug for UserInputLiteral {
    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
        match self.field_name {
-            Some(ref field_name) => write!(formatter, "{}:\"{}\"", field_name, self.phrase),
+            Some(ref field_name) => write!(formatter, "\"{}\":\"{}\"", field_name, self.phrase),
            None => write!(formatter, "\"{}\"", self.phrase),
        }
    }
@@ -79,7 +79,7 @@ impl UserInputBound {
        match *self {
            UserInputBound::Inclusive(ref contents) => contents,
            UserInputBound::Exclusive(ref contents) => contents,
-            UserInputBound::Unbounded => &"*",
+            UserInputBound::Unbounded => "*",
        }
    }
 }
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -297,10 +297,8 @@ impl Collector for FacetCollector {
                                if depth == collapse_depth + 1 {
                                    collapsed_id = collapse_facet_ords.len();
                                    collapse_facet_ords.push(facet_streamer.term_ord());
-                                    collapse_mapping.push(collapsed_id);
-                                } else {
-                                    collapse_mapping.push(collapsed_id);
                                }
+                                collapse_mapping.push(collapsed_id);
                            }
                            break;
                        }
--- a/src/collector/filter_collector_wrapper.rs
+++ b/src/collector/filter_collector_wrapper.rs
@@ -16,7 +16,7 @@ use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
 use crate::schema::Field;
 use crate::{Score, SegmentReader, TantivyError};

-/// The `FilterCollector` collector filters docs using a u64 fast field value and a predicate.
+/// The `FilterCollector` collector filters docs using a fast field value and a predicate.
 /// Only the documents for which the predicate returned "true" will be passed on to the next collector.
 ///
 /// ```rust
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -276,7 +276,7 @@ mod tests {
        let mut collectors = MultiCollector::new();
        let topdocs_handler = collectors.add_collector(TopDocs::with_limit(2));
        let count_handler = collectors.add_collector(Count);
-        let mut multifruits = searcher.search(&query, &mut collectors).unwrap();
+        let mut multifruits = searcher.search(&query, &collectors).unwrap();

        assert_eq!(count_handler.extract(&mut multifruits), 5);
        assert_eq!(topdocs_handler.extract(&mut multifruits).len(), 2);
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -1080,7 +1080,7 @@ mod tests {
        query: &str,
        query_field: Field,
        schema: Schema,
-        mut doc_adder: impl FnMut(&mut IndexWriter) -> (),
+        mut doc_adder: impl FnMut(&mut IndexWriter),
    ) -> (Index, Box<dyn Query>) {
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -1,203 +0,0 @@
-mod bitset;
-mod composite_file;
-
-pub use self::bitset::BitSet;
-pub(crate) use self::bitset::TinySet;
-pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
-pub use byteorder::LittleEndian as Endianness;
-pub use common::CountingWriter;
-pub use common::{
-    read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt,
-};
-pub use common::{BinarySerializable, DeserializeFrom, FixedSize};
-
-/// Segment's max doc must be `< MAX_DOC_LIMIT`.
-///
-/// We do not allow segments with more than
-pub const MAX_DOC_LIMIT: u32 = 1 << 31;
-
-/// Has length trait
-pub trait HasLen {
-    /// Return length
-    fn len(&self) -> usize;
-
-    /// Returns true iff empty.
-    fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-}
-
-const HIGHEST_BIT: u64 = 1 << 63;
-
-/// Maps a `i64` to `u64`
-///
-/// For simplicity, tantivy internally handles `i64` as `u64`.
-/// The mapping is defined by this function.
-///
-/// Maps `i64` to `u64` so that
-/// `-2^63 .. 2^63-1` is mapped
-///     to
-/// `0 .. 2^64-1`
-/// in that order.
-///
-/// This is more suited than simply casting (`val as u64`)
-/// because of bitpacking.
-///
-/// Imagine a list of `i64` ranging from -10 to 10.
-/// When casting negative values, the negative values are projected
-/// to values over 2^63, and all values end up requiring 64 bits.
-///
-/// # See also
-/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html).
-#[inline]
-pub fn i64_to_u64(val: i64) -> u64 {
-    (val as u64) ^ HIGHEST_BIT
-}
-
-/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
-#[inline]
-pub fn u64_to_i64(val: u64) -> i64 {
-    (val ^ HIGHEST_BIT) as i64
-}
-
-/// Maps a `f64` to `u64`
-///
-/// For simplicity, tantivy internally handles `f64` as `u64`.
-/// The mapping is defined by this function.
-///
-/// Maps `f64` to `u64` in a monotonic manner, so that bytes lexical order is preserved.
-///
-/// This is more suited than simply casting (`val as u64`)
-/// which would truncate the result
-///
-/// # Reference
-///
-/// Daniel Lemire's [blog post](https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order/)
-/// explains the mapping in a clear manner.
-///
-/// # See also
-/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html).
-#[inline]
-pub fn f64_to_u64(val: f64) -> u64 {
-    let bits = val.to_bits();
-    if val.is_sign_positive() {
-        bits ^ HIGHEST_BIT
-    } else {
-        !bits
-    }
-}
-
-/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
-#[inline]
-pub fn u64_to_f64(val: u64) -> f64 {
-    f64::from_bits(if val & HIGHEST_BIT != 0 {
-        val ^ HIGHEST_BIT
-    } else {
-        !val
-    })
-}
-
-#[cfg(test)]
-pub(crate) mod test {
-
-    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
-    use common::{BinarySerializable, FixedSize};
-    use proptest::prelude::*;
-    use std::f64;
-    use tantivy_bitpacker::compute_num_bits;
-    pub use tantivy_bitpacker::minmax;
-
-    fn test_i64_converter_helper(val: i64) {
-        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
-    }
-
-    fn test_f64_converter_helper(val: f64) {
-        assert_eq!(u64_to_f64(f64_to_u64(val)), val);
-    }
-
-    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
-        let mut buffer = Vec::new();
-        O::default().serialize(&mut buffer).unwrap();
-        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
-    }
-
-    proptest! {
-        #[test]
-        fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) {
-            let left_u64 = f64_to_u64(left);
-            let right_u64 = f64_to_u64(right);
-            assert_eq!(left_u64 < right_u64,  left < right);
-        }
-    }
-
-    #[test]
-    fn test_i64_converter() {
-        assert_eq!(i64_to_u64(i64::min_value()), u64::min_value());
-        assert_eq!(i64_to_u64(i64::max_value()), u64::max_value());
-        test_i64_converter_helper(0i64);
-        test_i64_converter_helper(i64::min_value());
-        test_i64_converter_helper(i64::max_value());
-        for i in -1000i64..1000i64 {
-            test_i64_converter_helper(i);
-        }
-    }
-
-    #[test]
-    fn test_f64_converter() {
-        test_f64_converter_helper(f64::INFINITY);
-        test_f64_converter_helper(f64::NEG_INFINITY);
-        test_f64_converter_helper(0.0);
-        test_f64_converter_helper(-0.0);
-        test_f64_converter_helper(1.0);
-        test_f64_converter_helper(-1.0);
-    }
-
-    #[test]
-    fn test_f64_order() {
-        assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
-            .contains(&f64_to_u64(f64::NAN))); //nan is not a number
-        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
-        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
-        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
-        assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
-        assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
-        assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
-        assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5));
-    }
-
-    #[test]
-    fn test_compute_num_bits() {
-        assert_eq!(compute_num_bits(1), 1u8);
-        assert_eq!(compute_num_bits(0), 0u8);
-        assert_eq!(compute_num_bits(2), 2u8);
-        assert_eq!(compute_num_bits(3), 2u8);
-        assert_eq!(compute_num_bits(4), 3u8);
-        assert_eq!(compute_num_bits(255), 8u8);
-        assert_eq!(compute_num_bits(256), 9u8);
-        assert_eq!(compute_num_bits(5_000_000_000), 33u8);
-    }
-
-    #[test]
-    fn test_max_doc() {
-        // this is the first time I write a unit test for a constant.
-        assert!(((super::MAX_DOC_LIMIT - 1) as i32) >= 0);
-        assert!((super::MAX_DOC_LIMIT as i32) < 0);
-    }
-
-    #[test]
-    fn test_minmax_empty() {
-        let vals: Vec<u32> = vec![];
-        assert_eq!(minmax(vals.into_iter()), None);
-    }
-
-    #[test]
-    fn test_minmax_one() {
-        assert_eq!(minmax(vec![1].into_iter()), Some((1, 1)));
-    }
-
-    #[test]
-    fn test_minmax_two() {
-        assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2)));
-        assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2)));
-    }
-}
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -42,7 +42,7 @@ fn load_metas(
            "Meta file does not contain valid utf8 file.".to_string(),
        )
    })?;
-    IndexMeta::deserialize(&meta_string, &inventory)
+    IndexMeta::deserialize(&meta_string, inventory)
        .map_err(|e| {
            DataCorruption::new(
                META_FILEPATH.to_path_buf(),
@@ -120,7 +120,7 @@ impl IndexBuilder {
    /// Creates a new index in a given filepath.
    /// The index will use the `MMapDirectory`.
    ///
-    /// If a previous index was in this directory, then its meta file will be destroyed.
+    /// If a previous index was in this directory, it returns an `IndexAlreadyExists` error.
    #[cfg(feature = "mmap")]
    pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index> {
        let mmap_directory = MmapDirectory::open(directory_path)?;
@@ -229,7 +229,8 @@ impl Index {
    /// Creates a new index using the `RamDirectory`.
    ///
    /// The index will be allocated in anonymous memory.
-    /// This should only be used for unit tests.
+    /// This is useful for indexing small set of documents
+    /// for instances like unit test or temporary in memory index.
    pub fn create_in_ram(schema: Schema) -> Index {
        IndexBuilder::new().schema(schema).create_in_ram().unwrap()
    }
@@ -237,7 +238,7 @@ impl Index {
    /// Creates a new index in a given filepath.
    /// The index will use the `MMapDirectory`.
    ///
-    /// If a previous index was in this directory, then its meta file will be destroyed.
+    /// If a previous index was in this directory, then it returns  an `IndexAlreadyExists` error.
    #[cfg(feature = "mmap")]
    pub fn create_in_dir<P: AsRef<Path>>(
        directory_path: P,
@@ -523,7 +524,22 @@ impl Index {

    /// Returns the set of corrupted files
    pub fn validate_checksum(&self) -> crate::Result<HashSet<PathBuf>> {
-        self.directory.list_damaged().map_err(Into::into)
+        let managed_files = self.directory.list_managed_files();
+        let active_segments_files: HashSet<PathBuf> = self
+            .searchable_segment_metas()?
+            .iter()
+            .flat_map(|segment_meta| segment_meta.list_files())
+            .collect();
+        let active_existing_files: HashSet<&PathBuf> =
+            active_segments_files.intersection(&managed_files).collect();
+
+        let mut damaged_files = HashSet::new();
+        for path in active_existing_files {
+            if !self.directory.validate_checksum(path)? {
+                damaged_files.insert((*path).clone());
+            }
+        }
+        Ok(damaged_files)
    }
 }

@@ -604,7 +620,7 @@ mod tests {
        .is_ok());
        assert!(Index::exists(&directory).unwrap());
        assert!(Index::create(
-            directory.clone(),
+            directory,
            Schema::builder().build(),
            IndexSettings::default()
        )
--- a/src/core/index_meta.rs
+++ b/src/core/index_meta.rs
@@ -101,6 +101,7 @@ impl SegmentMeta {

    /// Returns the list of files that
    /// are required for the segment meta.
+    /// Note: Some of the returned files may not exist depending on the state of the segment.
    ///
    /// This is useful as the way tantivy removes files
    /// is by removing all files that have been created by tantivy
@@ -369,7 +370,6 @@ mod tests {
        schema::{Schema, TEXT},
        IndexSettings, IndexSortByField, Order,
    };
-    use serde_json;

    #[test]
    fn test_serialize_metas() {
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -1,6 +1,5 @@
 use std::io;

-use crate::common::BinarySerializable;
 use crate::directory::FileSlice;
 use crate::positions::PositionReader;
 use crate::postings::TermInfo;
@@ -8,6 +7,7 @@ use crate::postings::{BlockSegmentPostings, SegmentPostings};
 use crate::schema::IndexRecordOption;
 use crate::schema::Term;
 use crate::termdict::TermDictionary;
+use common::BinarySerializable;

 /// The inverted index reader is in charge of accessing
 /// the inverted index associated to a specific field.
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -16,7 +16,6 @@ pub use self::index_meta::{
 pub use self::inverted_index_reader::InvertedIndexReader;
 pub use self::searcher::Searcher;
 pub use self::segment::Segment;
-pub use self::segment::SerializableSegment;
 pub use self::segment_component::SegmentComponent;
 pub use self::segment_id::SegmentId;
 pub use self::segment_reader::SegmentReader;
--- a/src/core/segment.rs
+++ b/src/core/segment.rs
@@ -1,13 +1,12 @@
 use super::SegmentComponent;
+use crate::core::Index;
 use crate::core::SegmentId;
 use crate::core::SegmentMeta;
 use crate::directory::error::{OpenReadError, OpenWriteError};
 use crate::directory::Directory;
 use crate::directory::{FileSlice, WritePtr};
-use crate::indexer::segment_serializer::SegmentSerializer;
 use crate::schema::Schema;
 use crate::Opstamp;
-use crate::{core::Index, indexer::doc_id_mapping::DocIdMapping};
 use std::fmt;
 use std::path::PathBuf;

@@ -90,20 +89,3 @@ impl Segment {
        Ok(write)
    }
 }
-
-pub trait SerializableSegment {
-    /// Writes a view of a segment by pushing information
-    /// to the `SegmentSerializer`.
-    ///
-    /// # Returns
-    /// The number of documents in the segment.
-    ///
-    /// doc_id_map is used when index is created and sorted, to map to the new doc_id order.
-    /// It is not used by the `IndexMerger`, since the doc_id_mapping on cross-segments works
-    /// differently
-    fn write(
-        &self,
-        serializer: SegmentSerializer,
-        doc_id_map: Option<&DocIdMapping>,
-    ) -> crate::Result<u32>;
-}
--- a/src/core/segment_id.rs
+++ b/src/core/segment_id.rs
@@ -22,7 +22,7 @@ use std::sync::atomic;
 pub struct SegmentId(Uuid);

 #[cfg(test)]
-static AUTO_INC_COUNTER: Lazy<atomic::AtomicUsize> = Lazy::new(|| atomic::AtomicUsize::default());
+static AUTO_INC_COUNTER: Lazy<atomic::AtomicUsize> = Lazy::new(atomic::AtomicUsize::default);

 #[cfg(test)]
 const ZERO_ARRAY: [u8; 8] = [0u8; 8];
@@ -108,6 +108,12 @@ impl fmt::Debug for SegmentId {
    }
 }

+impl fmt::Display for SegmentId {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "Seg({:?})", self.short_uuid_string())
+    }
+}
+
 impl PartialOrd for SegmentId {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -1,9 +1,10 @@
-use crate::common::HasLen;
 use crate::core::InvertedIndexReader;
 use crate::core::Segment;
 use crate::core::SegmentComponent;
 use crate::core::SegmentId;
+use crate::directory::CompositeFile;
 use crate::directory::FileSlice;
+use crate::error::DataCorruption;
 use crate::fastfield::DeleteBitSet;
 use crate::fastfield::FacetReader;
 use crate::fastfield::FastFieldReaders;
@@ -15,7 +16,6 @@ use crate::space_usage::SegmentSpaceUsage;
 use crate::store::StoreReader;
 use crate::termdict::TermDictionary;
 use crate::DocId;
-use crate::{common::CompositeFile, error::DataCorruption};
 use fail::fail_point;
 use std::fmt;
 use std::sync::Arc;
@@ -32,9 +32,6 @@ use std::{collections::HashMap, io};
 ///
 /// The segment reader has a very low memory footprint,
 /// as close to all of the memory data is mmapped.
-///
-///
-/// TODO fix not decoding docfreq
 #[derive(Clone)]
 pub struct SegmentReader {
    inv_idx_reader_cache: Arc<RwLock<HashMap<Field, Arc<InvertedIndexReader>>>>,
@@ -57,17 +54,12 @@ pub struct SegmentReader {
 impl SegmentReader {
    /// Returns the highest document id ever attributed in
    /// this segment + 1.
-    /// Today, `tantivy` does not handle deletes, so it happens
-    /// to also be the number of documents in the index.
    pub fn max_doc(&self) -> DocId {
        self.max_doc
    }

-    /// Returns the number of documents.
+    /// Returns the number of alive documents.
    /// Deleted documents are not counted.
-    ///
-    /// Today, `tantivy` does not handle deletes so max doc and
-    /// num_docs are the same.
    pub fn num_docs(&self) -> DocId {
        self.num_docs
    }
@@ -81,7 +73,7 @@ impl SegmentReader {
    /// deleted in the segment.
    pub fn num_deleted_docs(&self) -> DocId {
        self.delete_bitset()
-            .map(|delete_set| delete_set.len() as DocId)
+            .map(|delete_set| delete_set.num_deleted() as DocId)
            .unwrap_or(0u32)
    }

@@ -329,6 +321,32 @@ mod test {
    use crate::schema::{Schema, Term, STORED, TEXT};
    use crate::DocId;

+    #[test]
+    fn test_num_alive() -> crate::Result<()> {
+        let mut schema_builder = Schema::builder();
+        schema_builder.add_text_field("name", TEXT | STORED);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema.clone());
+        let name = schema.get_field("name").unwrap();
+
+        {
+            let mut index_writer = index.writer_for_tests()?;
+            index_writer.add_document(doc!(name => "tantivy"));
+            index_writer.add_document(doc!(name => "horse"));
+            index_writer.add_document(doc!(name => "jockey"));
+            index_writer.add_document(doc!(name => "cap"));
+            // we should now have one segment with two docs
+            index_writer.delete_term(Term::from_field_text(name, "horse"));
+            index_writer.delete_term(Term::from_field_text(name, "cap"));
+
+            // ok, now we should have a deleted doc
+            index_writer.commit()?;
+        }
+        let searcher = index.reader()?.searcher();
+        assert_eq!(2, searcher.segment_reader(0).num_docs());
+        assert_eq!(4, searcher.segment_reader(0).max_doc());
+        Ok(())
+    }
    #[test]
    fn test_alive_docs_iterator() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
--- a/src/directory/composite_file.rs
+++ b/src/directory/composite_file.rs
@@ -1,18 +1,17 @@
-use crate::common::BinarySerializable;
-use crate::common::CountingWriter;
-use crate::common::VInt;
 use crate::directory::FileSlice;
 use crate::directory::{TerminatingWrite, WritePtr};
 use crate::schema::Field;
 use crate::space_usage::FieldUsage;
 use crate::space_usage::PerFieldSpaceUsage;
+use common::BinarySerializable;
+use common::CountingWriter;
+use common::HasLen;
+use common::VInt;
 use std::collections::HashMap;
 use std::io::{self, Read, Write};
 use std::iter::ExactSizeIterator;
 use std::ops::Range;

-use super::HasLen;
-
 #[derive(Eq, PartialEq, Hash, Copy, Ord, PartialOrd, Clone, Debug)]
 pub struct FileAddr {
    field: Field,
@@ -188,10 +187,10 @@ impl CompositeFile {
 mod test {

    use super::{CompositeFile, CompositeWrite};
-    use crate::common::BinarySerializable;
-    use crate::common::VInt;
    use crate::directory::{Directory, RamDirectory};
    use crate::schema::Field;
+    use common::BinarySerializable;
+    use common::VInt;
    use std::io::Write;
    use std::path::Path;

--- a/src/directory/file_slice.rs
+++ b/src/directory/file_slice.rs
@@ -1,7 +1,7 @@
 use stable_deref_trait::StableDeref;

-use crate::common::HasLen;
 use crate::directory::OwnedBytes;
+use common::HasLen;
 use std::fmt;
 use std::ops::Range;
 use std::sync::{Arc, Weak};
@@ -32,12 +32,6 @@ impl FileHandle for &'static [u8] {
    }
 }

-impl<T: Deref<Target = [u8]>> HasLen for T {
-    fn len(&self) -> usize {
-        self.deref().len()
-    }
-}
-
 impl<B> From<B> for FileSlice
 where
    B: StableDeref + Deref<Target = [u8]> + 'static + Send + Sync,
@@ -178,7 +172,7 @@ impl HasLen for FileSlice {
 #[cfg(test)]
 mod tests {
    use super::{FileHandle, FileSlice};
-    use crate::common::HasLen;
+    use common::HasLen;
    use std::io;

    #[test]
@@ -211,7 +205,7 @@ mod tests {
            assert_eq!(right.read_bytes()?.as_slice(), b"");
        }
        {
-            let (left, right) = file_slice.clone().split_from_end(2);
+            let (left, right) = file_slice.split_from_end(2);
            assert_eq!(left.read_bytes()?.as_slice(), b"abcd");
            assert_eq!(right.read_bytes()?.as_slice(), b"ef");
        }
--- a/src/directory/footer.rs
+++ b/src/directory/footer.rs
@@ -1,10 +1,10 @@
 use crate::directory::error::Incompatibility;
 use crate::directory::FileSlice;
 use crate::{
-    common::{BinarySerializable, CountingWriter, DeserializeFrom, FixedSize, HasLen},
    directory::{AntiCallToken, TerminatingWrite},
    Version, INDEX_FORMAT_VERSION,
 };
+use common::{BinarySerializable, CountingWriter, DeserializeFrom, FixedSize, HasLen};
 use crc32fast::Hasher;
 use serde::{Deserialize, Serialize};
 use std::io;
@@ -156,10 +156,8 @@ mod tests {

    use crate::directory::footer::Footer;
    use crate::directory::OwnedBytes;
-    use crate::{
-        common::BinarySerializable,
-        directory::{footer::FOOTER_MAGIC_NUMBER, FileSlice},
-    };
+    use crate::directory::{footer::FOOTER_MAGIC_NUMBER, FileSlice};
+    use common::BinarySerializable;
    use std::io;

    #[test]
--- a/src/directory/managed_directory.rs
+++ b/src/directory/managed_directory.rs
@@ -1,4 +1,4 @@
-use crate::core::{MANAGED_FILEPATH, META_FILEPATH};
+use crate::core::MANAGED_FILEPATH;
 use crate::directory::error::{DeleteError, LockError, OpenReadError, OpenWriteError};
 use crate::directory::footer::{Footer, FooterProxy};
 use crate::directory::GarbageCollectionResult;
@@ -248,24 +248,15 @@ impl ManagedDirectory {
        Ok(footer.crc() == crc)
    }

-    /// List files for which checksum does not match content
-    pub fn list_damaged(&self) -> result::Result<HashSet<PathBuf>, OpenReadError> {
-        let mut managed_paths = self
+    /// List all managed files
+    pub fn list_managed_files(&self) -> HashSet<PathBuf> {
+        let managed_paths = self
            .meta_informations
            .read()
            .expect("Managed directory rlock poisoned in list damaged.")
            .managed_paths
            .clone();
-
-        managed_paths.remove(*META_FILEPATH);
-
-        let mut damaged_files = HashSet::new();
-        for path in managed_paths {
-            if !self.validate_checksum(&path)? {
-                damaged_files.insert(path);
-            }
-        }
-        Ok(damaged_files)
+        managed_paths
    }
 }

@@ -336,7 +327,6 @@ mod tests_mmap_specific {

    use crate::directory::{Directory, ManagedDirectory, MmapDirectory, TerminatingWrite};
    use std::collections::HashSet;
-    use std::fs::OpenOptions;
    use std::io::Write;
    use std::path::{Path, PathBuf};
    use tempfile::TempDir;
@@ -402,44 +392,7 @@ mod tests_mmap_specific {
            // The file should still be in the list of managed file and
            // eventually be deleted once mmap is released.
            assert!(managed_directory.garbage_collect(|| living_files).is_ok());
-            assert!(!managed_directory.exists(test_path1).unwrap());
-        } else {
-            assert!(!managed_directory.exists(test_path1).unwrap());
        }
-    }
-
-    #[test]
-    fn test_checksum() -> crate::Result<()> {
-        let test_path1: &'static Path = Path::new("some_path_for_test");
-        let test_path2: &'static Path = Path::new("other_test_path");
-
-        let tempdir = TempDir::new().unwrap();
-        let tempdir_path = PathBuf::from(tempdir.path());
-
-        let mmap_directory = MmapDirectory::open(&tempdir_path)?;
-        let managed_directory = ManagedDirectory::wrap(mmap_directory)?;
-        let mut write = managed_directory.open_write(test_path1)?;
-        write.write_all(&[0u8, 1u8])?;
-        write.terminate()?;
-
-        let mut write = managed_directory.open_write(test_path2)?;
-        write.write_all(&[3u8, 4u8, 5u8])?;
-        write.terminate()?;
-
-        let read_file = managed_directory.open_read(test_path2)?.read_bytes()?;
-        assert_eq!(read_file.as_slice(), &[3u8, 4u8, 5u8]);
-        assert!(managed_directory.list_damaged().unwrap().is_empty());
-
-        let mut corrupted_path = tempdir_path.clone();
-        corrupted_path.push(test_path2);
-        let mut file = OpenOptions::new().write(true).open(&corrupted_path)?;
-        file.write_all(&[255u8])?;
-        file.flush()?;
-        drop(file);
-
-        let damaged = managed_directory.list_damaged()?;
-        assert_eq!(damaged.len(), 1);
-        assert!(damaged.contains(test_path2));
-        Ok(())
+        assert!(!managed_directory.exists(test_path1).unwrap());
    }
 }
--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -11,7 +11,7 @@ use crate::directory::{AntiCallToken, FileHandle, OwnedBytes};
 use crate::directory::{ArcBytes, WeakArcBytes};
 use crate::directory::{TerminatingWrite, WritePtr};
 use fs2::FileExt;
-use memmap::Mmap;
+use memmap2::Mmap;
 use serde::{Deserialize, Serialize};
 use stable_deref_trait::StableDeref;
 use std::convert::From;
@@ -53,7 +53,7 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
        return Ok(None);
    }
    unsafe {
-        memmap::Mmap::map(&file)
+        memmap2::Mmap::map(&file)
            .map(Some)
            .map_err(|io_err| OpenReadError::wrap_io_error(io_err, full_path.to_path_buf()))
    }
@@ -485,13 +485,14 @@ mod tests {
    // The following tests are specific to the MmapDirectory

    use super::*;
+    use crate::indexer::LogMergePolicy;
    use crate::Index;
    use crate::ReloadPolicy;
-    use crate::{common::HasLen, indexer::LogMergePolicy};
    use crate::{
        schema::{Schema, SchemaBuilder, TEXT},
        IndexSettings,
    };
+    use common::HasLen;

    #[test]
    fn test_open_non_existent_path() {
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -20,6 +20,9 @@ mod watch_event_router;
 /// Errors specific to the directory module.
 pub mod error;

+mod composite_file;
+
+pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
 pub use self::directory::DirectoryLock;
 pub use self::directory::{Directory, DirectoryClone};
 pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
--- a/src/directory/owned_bytes.rs
+++ b/src/directory/owned_bytes.rs
@@ -1,290 +1,11 @@
 use crate::directory::FileHandle;
-use stable_deref_trait::StableDeref;
-use std::convert::TryInto;
-use std::mem;
-use std::ops::{Deref, Range};
-use std::sync::Arc;
-use std::{fmt, io};
+use std::io;
+use std::ops::Range;

-/// An OwnedBytes simply wraps an object that owns a slice of data and exposes
-/// this data as a static slice.
-///
-/// The backing object is required to be `StableDeref`.
-#[derive(Clone)]
-pub struct OwnedBytes {
-    data: &'static [u8],
-    box_stable_deref: Arc<dyn Deref<Target = [u8]> + Sync + Send>,
-}
+pub use ownedbytes::OwnedBytes;

 impl FileHandle for OwnedBytes {
    fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
        Ok(self.slice(range))
    }
 }
-
-impl OwnedBytes {
-    /// Creates an empty `OwnedBytes`.
-    pub fn empty() -> OwnedBytes {
-        OwnedBytes::new(&[][..])
-    }
-
-    /// Creates an `OwnedBytes` intance given a `StableDeref` object.
-    pub fn new<T: StableDeref + Deref<Target = [u8]> + 'static + Send + Sync>(
-        data_holder: T,
-    ) -> OwnedBytes {
-        let box_stable_deref = Arc::new(data_holder);
-        let bytes: &[u8] = box_stable_deref.as_ref();
-        let data = unsafe { mem::transmute::<_, &'static [u8]>(bytes.deref()) };
-        OwnedBytes {
-            data,
-            box_stable_deref,
-        }
-    }
-
-    /// creates a fileslice that is just a view over a slice of the data.
-    pub fn slice(&self, range: Range<usize>) -> Self {
-        OwnedBytes {
-            data: &self.data[range],
-            box_stable_deref: self.box_stable_deref.clone(),
-        }
-    }
-
-    /// Returns the underlying slice of data.
-    /// `Deref` and `AsRef` are also available.
-    #[inline]
-    pub fn as_slice(&self) -> &[u8] {
-        self.data
-    }
-
-    /// Returns the len of the slice.
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.data.len()
-    }
-
-    /// Splits the OwnedBytes into two OwnedBytes `(left, right)`.
-    ///
-    /// Left will hold `split_len` bytes.
-    ///
-    /// This operation is cheap and does not require to copy any memory.
-    /// On the other hand, both `left` and `right` retain a handle over
-    /// the entire slice of memory. In other words, the memory will only
-    /// be released when both left and right are dropped.
-    pub fn split(self, split_len: usize) -> (OwnedBytes, OwnedBytes) {
-        let right_box_stable_deref = self.box_stable_deref.clone();
-        let left = OwnedBytes {
-            data: &self.data[..split_len],
-            box_stable_deref: self.box_stable_deref,
-        };
-        let right = OwnedBytes {
-            data: &self.data[split_len..],
-            box_stable_deref: right_box_stable_deref,
-        };
-        (left, right)
-    }
-
-    /// Returns true iff this `OwnedBytes` is empty.
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.as_slice().is_empty()
-    }
-
-    /// Drops the left most `advance_len` bytes.
-    ///
-    /// See also [.clip(clip_len: usize))](#method.clip).
-    #[inline]
-    pub fn advance(&mut self, advance_len: usize) {
-        self.data = &self.data[advance_len..]
-    }
-
-    /// Reads an `u8` from the `OwnedBytes` and advance by one byte.
-    pub fn read_u8(&mut self) -> u8 {
-        assert!(!self.is_empty());
-
-        let byte = self.as_slice()[0];
-        self.advance(1);
-        byte
-    }
-
-    /// Reads an `u64` encoded as little-endian from the `OwnedBytes` and advance by 8 bytes.
-    pub fn read_u64(&mut self) -> u64 {
-        assert!(self.len() > 7);
-
-        let octlet: [u8; 8] = self.as_slice()[..8].try_into().unwrap();
-        self.advance(8);
-        u64::from_le_bytes(octlet)
-    }
-}
-
-impl fmt::Debug for OwnedBytes {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        // We truncate the bytes in order to make sure the debug string
-        // is not too long.
-        let bytes_truncated: &[u8] = if self.len() > 8 {
-            &self.as_slice()[..10]
-        } else {
-            self.as_slice()
-        };
-        write!(f, "OwnedBytes({:?}, len={})", bytes_truncated, self.len())
-    }
-}
-
-impl Deref for OwnedBytes {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        self.as_slice()
-    }
-}
-
-impl io::Read for OwnedBytes {
-    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
-        let read_len = {
-            let data = self.as_slice();
-            if data.len() >= buf.len() {
-                let buf_len = buf.len();
-                buf.copy_from_slice(&data[..buf_len]);
-                buf.len()
-            } else {
-                let data_len = data.len();
-                buf[..data_len].copy_from_slice(data);
-                data_len
-            }
-        };
-        self.advance(read_len);
-        Ok(read_len)
-    }
-    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
-        let read_len = {
-            let data = self.as_slice();
-            buf.extend(data);
-            data.len()
-        };
-        self.advance(read_len);
-        Ok(read_len)
-    }
-    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
-        let read_len = self.read(buf)?;
-        if read_len != buf.len() {
-            return Err(io::Error::new(
-                io::ErrorKind::UnexpectedEof,
-                "failed to fill whole buffer",
-            ));
-        }
-        Ok(())
-    }
-}
-
-impl AsRef<[u8]> for OwnedBytes {
-    fn as_ref(&self) -> &[u8] {
-        self.as_slice()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::io::{self, Read};
-
-    use super::OwnedBytes;
-
-    #[test]
-    fn test_owned_bytes_debug() {
-        let short_bytes = OwnedBytes::new(b"abcd".as_ref());
-        assert_eq!(
-            format!("{:?}", short_bytes),
-            "OwnedBytes([97, 98, 99, 100], len=4)"
-        );
-        let long_bytes = OwnedBytes::new(b"abcdefghijklmnopq".as_ref());
-        assert_eq!(
-            format!("{:?}", long_bytes),
-            "OwnedBytes([97, 98, 99, 100, 101, 102, 103, 104, 105, 106], len=17)"
-        );
-    }
-
-    #[test]
-    fn test_owned_bytes_read() -> io::Result<()> {
-        let mut bytes = OwnedBytes::new(b"abcdefghiklmnopqrstuvwxyz".as_ref());
-        {
-            let mut buf = [0u8; 5];
-            bytes.read_exact(&mut buf[..]).unwrap();
-            assert_eq!(&buf, b"abcde");
-            assert_eq!(bytes.as_slice(), b"fghiklmnopqrstuvwxyz")
-        }
-        {
-            let mut buf = [0u8; 2];
-            bytes.read_exact(&mut buf[..]).unwrap();
-            assert_eq!(&buf, b"fg");
-            assert_eq!(bytes.as_slice(), b"hiklmnopqrstuvwxyz")
-        }
-        Ok(())
-    }
-
-    #[test]
-    fn test_owned_bytes_read_right_at_the_end() -> io::Result<()> {
-        let mut bytes = OwnedBytes::new(b"abcde".as_ref());
-        let mut buf = [0u8; 5];
-        assert_eq!(bytes.read(&mut buf[..]).unwrap(), 5);
-        assert_eq!(&buf, b"abcde");
-        assert_eq!(bytes.as_slice(), b"");
-        assert_eq!(bytes.read(&mut buf[..]).unwrap(), 0);
-        assert_eq!(&buf, b"abcde");
-        Ok(())
-    }
-    #[test]
-    fn test_owned_bytes_read_incomplete() -> io::Result<()> {
-        let mut bytes = OwnedBytes::new(b"abcde".as_ref());
-        let mut buf = [0u8; 7];
-        assert_eq!(bytes.read(&mut buf[..]).unwrap(), 5);
-        assert_eq!(&buf[..5], b"abcde");
-        assert_eq!(bytes.read(&mut buf[..]).unwrap(), 0);
-        Ok(())
-    }
-
-    #[test]
-    fn test_owned_bytes_read_to_end() -> io::Result<()> {
-        let mut bytes = OwnedBytes::new(b"abcde".as_ref());
-        let mut buf = Vec::new();
-        bytes.read_to_end(&mut buf)?;
-        assert_eq!(buf.as_slice(), b"abcde".as_ref());
-        Ok(())
-    }
-
-    #[test]
-    fn test_owned_bytes_read_u8() -> io::Result<()> {
-        let mut bytes = OwnedBytes::new(b"\xFF".as_ref());
-        assert_eq!(bytes.read_u8(), 255);
-        assert_eq!(bytes.len(), 0);
-        Ok(())
-    }
-
-    #[test]
-    fn test_owned_bytes_read_u64() -> io::Result<()> {
-        let mut bytes = OwnedBytes::new(b"\0\xFF\xFF\xFF\xFF\xFF\xFF\xFF".as_ref());
-        assert_eq!(bytes.read_u64(), u64::MAX - 255);
-        assert_eq!(bytes.len(), 0);
-        Ok(())
-    }
-
-    #[test]
-    fn test_owned_bytes_split() {
-        let bytes = OwnedBytes::new(b"abcdefghi".as_ref());
-        let (left, right) = bytes.split(3);
-        assert_eq!(left.as_slice(), b"abc");
-        assert_eq!(right.as_slice(), b"defghi");
-    }
-
-    #[test]
-    fn test_owned_bytes_split_boundary() {
-        let bytes = OwnedBytes::new(b"abcdefghi".as_ref());
-        {
-            let (left, right) = bytes.clone().split(0);
-            assert_eq!(left.as_slice(), b"");
-            assert_eq!(right.as_slice(), b"abcdefghi");
-        }
-        {
-            let (left, right) = bytes.split(9);
-            assert_eq!(left.as_slice(), b"abcdefghi");
-            assert_eq!(right.as_slice(), b"");
-        }
-    }
-}
--- a/src/directory/ram_directory.rs
+++ b/src/directory/ram_directory.rs
@@ -1,9 +1,10 @@
+use crate::core::META_FILEPATH;
 use crate::directory::error::{DeleteError, OpenReadError, OpenWriteError};
 use crate::directory::AntiCallToken;
 use crate::directory::WatchCallbackList;
 use crate::directory::{Directory, FileSlice, WatchCallback, WatchHandle};
 use crate::directory::{TerminatingWrite, WritePtr};
-use crate::{common::HasLen, core::META_FILEPATH};
+use common::HasLen;
 use fail::fail_point;
 use std::collections::HashMap;
 use std::fmt;
--- a/src/directory/tests.rs
+++ b/src/directory/tests.rs
@@ -166,26 +166,26 @@ fn test_write_create_the_file(directory: &dyn Directory) {
 fn test_directory_delete(directory: &dyn Directory) -> crate::Result<()> {
    let test_path: &'static Path = Path::new("some_path_for_test");
    assert!(directory.open_read(test_path).is_err());
-    let mut write_file = directory.open_write(&test_path)?;
+    let mut write_file = directory.open_write(test_path)?;
    write_file.write_all(&[1, 2, 3, 4])?;
    write_file.flush()?;
    {
-        let read_handle = directory.open_read(&test_path)?.read_bytes()?;
+        let read_handle = directory.open_read(test_path)?.read_bytes()?;
        assert_eq!(read_handle.as_slice(), &[1u8, 2u8, 3u8, 4u8]);
        // Mapped files can't be deleted on Windows
        if !cfg!(windows) {
-            assert!(directory.delete(&test_path).is_ok());
+            assert!(directory.delete(test_path).is_ok());
            assert_eq!(read_handle.as_slice(), &[1u8, 2u8, 3u8, 4u8]);
        }
        assert!(directory.delete(Path::new("SomeOtherPath")).is_err());
    }

    if cfg!(windows) {
-        assert!(directory.delete(&test_path).is_ok());
+        assert!(directory.delete(test_path).is_ok());
    }

-    assert!(directory.open_read(&test_path).is_err());
-    assert!(directory.delete(&test_path).is_err());
+    assert!(directory.open_read(test_path).is_err());
+    assert!(directory.delete(test_path).is_err());
    Ok(())
 }

--- a/src/fastfield/bytes/writer.rs
+++ b/src/fastfield/bytes/writer.rs
@@ -83,11 +83,11 @@ impl BytesFastFieldWriter {
        &'a self,
        doc_id_map: Option<&'b DocIdMapping>,
    ) -> impl Iterator<Item = &'b [u8]> {
-        let doc_id_iter = if let Some(doc_id_map) = doc_id_map {
-            Box::new(doc_id_map.iter_old_doc_ids().cloned()) as Box<dyn Iterator<Item = u32>>
+        let doc_id_iter: Box<dyn Iterator<Item = u32>> = if let Some(doc_id_map) = doc_id_map {
+            Box::new(doc_id_map.iter_old_doc_ids())
        } else {
-            Box::new(self.doc_index.iter().enumerate().map(|el| el.0 as u32))
-                as Box<dyn Iterator<Item = u32>>
+            let max_doc = self.doc_index.len() as u32;
+            Box::new(0..max_doc)
        };
        doc_id_iter.map(move |doc_id| self.get_values_for_doc_id(doc_id))
    }
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -1,9 +1,10 @@
-use crate::common::{BitSet, HasLen};
 use crate::directory::FileSlice;
 use crate::directory::OwnedBytes;
 use crate::directory::WritePtr;
 use crate::space_usage::ByteCount;
 use crate::DocId;
+use common::BitSet;
+use common::HasLen;
 use std::io;
 use std::io::Write;

@@ -91,6 +92,10 @@ impl DeleteBitSet {
        b & (1u8 << shift) != 0
    }

+    /// The number of deleted docs
+    pub fn num_deleted(&self) -> usize {
+        self.num_deleted
+    }
    /// Summarize total space usage of this bitset.
    pub fn space_usage(&self) -> ByteCount {
        self.data.len()
@@ -106,7 +111,7 @@ impl HasLen for DeleteBitSet {
 #[cfg(test)]
 mod tests {
    use super::DeleteBitSet;
-    use crate::common::HasLen;
+    use common::HasLen;

    #[test]
    fn test_delete_bitset_empty() {
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -40,11 +40,11 @@ pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
 use crate::schema::Cardinality;
 use crate::schema::FieldType;
 use crate::schema::Value;
+use crate::DocId;
 use crate::{
    chrono::{NaiveDateTime, Utc},
    schema::Type,
 };
-use crate::{common, DocId};

 mod bytes;
 mod delete;
@@ -213,8 +213,7 @@ fn value_to_u64(value: &Value) -> u64 {
 mod tests {

    use super::*;
-    use crate::common::CompositeFile;
-    use crate::common::HasLen;
+    use crate::directory::CompositeFile;
    use crate::directory::{Directory, RamDirectory, WritePtr};
    use crate::merge_policy::NoMergePolicy;
    use crate::schema::Field;
@@ -222,6 +221,7 @@ mod tests {
    use crate::schema::FAST;
    use crate::schema::{Document, IntOptions};
    use crate::{Index, SegmentId, SegmentReader};
+    use common::HasLen;
    use once_cell::sync::Lazy;
    use rand::prelude::SliceRandom;
    use rand::rngs::StdRng;
@@ -267,8 +267,8 @@ mod tests {
                .unwrap();
            serializer.close().unwrap();
        }
-        let file = directory.open_read(&path).unwrap();
-        assert_eq!(file.len(), 37 as usize);
+        let file = directory.open_read(path).unwrap();
+        assert_eq!(file.len(), 37);
        let composite_file = CompositeFile::open(&file)?;
        let file = composite_file.open_read(*FIELD).unwrap();
        let fast_field_reader = DynamicFastFieldReader::<u64>::open(file)?;
@@ -298,8 +298,8 @@ mod tests {
            fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?;
            serializer.close()?;
        }
-        let file = directory.open_read(&path)?;
-        assert_eq!(file.len(), 62 as usize);
+        let file = directory.open_read(path)?;
+        assert_eq!(file.len(), 62);
        {
            let fast_fields_composite = CompositeFile::open(&file)?;
            let data = fast_fields_composite.open_read(*FIELD).unwrap();
@@ -334,8 +334,8 @@ mod tests {
                .unwrap();
            serializer.close().unwrap();
        }
-        let file = directory.open_read(&path).unwrap();
-        assert_eq!(file.len(), 35 as usize);
+        let file = directory.open_read(path).unwrap();
+        assert_eq!(file.len(), 35);
        {
            let fast_fields_composite = CompositeFile::open(&file).unwrap();
            let data = fast_fields_composite.open_read(*FIELD).unwrap();
@@ -366,8 +366,8 @@ mod tests {
                .unwrap();
            serializer.close().unwrap();
        }
-        let file = directory.open_read(&path).unwrap();
-        assert_eq!(file.len(), 80043 as usize);
+        let file = directory.open_read(path).unwrap();
+        assert_eq!(file.len(), 80043);
        {
            let fast_fields_composite = CompositeFile::open(&file)?;
            let data = fast_fields_composite.open_read(*FIELD).unwrap();
@@ -405,9 +405,9 @@ mod tests {
                .unwrap();
            serializer.close().unwrap();
        }
-        let file = directory.open_read(&path).unwrap();
+        let file = directory.open_read(path).unwrap();
        //assert_eq!(file.len(), 17710 as usize); //bitpacked size
-        assert_eq!(file.len(), 10175 as usize); // linear interpol size
+        assert_eq!(file.len(), 10175_usize); // linear interpol size
        {
            let fast_fields_composite = CompositeFile::open(&file)?;
            let data = fast_fields_composite.open_read(i64_field).unwrap();
@@ -447,7 +447,7 @@ mod tests {
            serializer.close().unwrap();
        }

-        let file = directory.open_read(&path).unwrap();
+        let file = directory.open_read(path).unwrap();
        {
            let fast_fields_composite = CompositeFile::open(&file).unwrap();
            let data = fast_fields_composite.open_read(i64_field).unwrap();
@@ -480,7 +480,7 @@ mod tests {
            fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?;
            serializer.close()?;
        }
-        let file = directory.open_read(&path)?;
+        let file = directory.open_read(path)?;
        {
            let fast_fields_composite = CompositeFile::open(&file)?;
            let data = fast_fields_composite.open_read(*FIELD).unwrap();
@@ -588,7 +588,7 @@ mod bench {
    use super::tests::FIELD;
    use super::tests::{generate_permutation, SCHEMA};
    use super::*;
-    use crate::common::CompositeFile;
+    use crate::directory::CompositeFile;
    use crate::directory::{Directory, RamDirectory, WritePtr};
    use crate::fastfield::FastFieldReader;
    use std::collections::HashMap;
--- a/src/fastfield/multivalued/mod.rs
+++ b/src/fastfield/multivalued/mod.rs
@@ -8,14 +8,22 @@ pub use self::writer::MultiValuedFastFieldWriter;
 mod tests {

    use crate::collector::TopDocs;
+    use crate::indexer::NoMergePolicy;
    use crate::query::QueryParser;
    use crate::schema::Cardinality;
    use crate::schema::Facet;
    use crate::schema::IntOptions;
    use crate::schema::Schema;
    use crate::schema::INDEXED;
+    use crate::Document;
    use crate::Index;
+    use crate::Term;
    use chrono::Duration;
+    use futures::executor::block_on;
+    use proptest::prop_oneof;
+    use proptest::proptest;
+    use proptest::strategy::Strategy;
+    use test_env_log::test;

    #[test]
    fn test_multivalued_u64() {
@@ -90,7 +98,7 @@ mod tests {
        {
            let parser = QueryParser::for_index(&index, vec![date_field]);
            let query = parser
-                .parse_query(&format!("\"{}\"", first_time_stamp.to_rfc3339()).to_string())
+                .parse_query(&format!("\"{}\"", first_time_stamp.to_rfc3339()))
                .expect("could not parse query");
            let results = searcher
                .search(&query, &TopDocs::with_limit(5))
@@ -121,7 +129,7 @@ mod tests {
        {
            let parser = QueryParser::for_index(&index, vec![date_field]);
            let query = parser
-                .parse_query(&format!("\"{}\"", two_secs_ahead.to_rfc3339()).to_string())
+                .parse_query(&format!("\"{}\"", two_secs_ahead.to_rfc3339()))
                .expect("could not parse query");
            let results = searcher
                .search(&query, &TopDocs::with_limit(5))
@@ -225,6 +233,111 @@ mod tests {
        multi_value_reader.get_vals(3, &mut vals);
        assert_eq!(&vals, &[-5i64, -20i64, 1i64]);
    }
+
+    fn test_multivalued_no_panic(ops: &[IndexingOp]) {
+        let mut schema_builder = Schema::builder();
+        let field = schema_builder.add_u64_field(
+            "multifield",
+            IntOptions::default()
+                .set_fast(Cardinality::MultiValues)
+                .set_indexed(),
+        );
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index.writer_for_tests().unwrap();
+        index_writer.set_merge_policy(Box::new(NoMergePolicy));
+
+        for &op in ops {
+            match op {
+                IndexingOp::AddDoc { id } => {
+                    match id % 3 {
+                        0 => {
+                            index_writer.add_document(doc!());
+                        }
+                        1 => {
+                            let mut doc = Document::new();
+                            for _ in 0..5001 {
+                                doc.add_u64(field, id as u64);
+                            }
+                            index_writer.add_document(doc);
+                        }
+                        _ => {
+                            let mut doc = Document::new();
+                            doc.add_u64(field, id as u64);
+                            index_writer.add_document(doc);
+                        }
+                    };
+                }
+                IndexingOp::DeleteDoc { id } => {
+                    index_writer.delete_term(Term::from_field_u64(field, id as u64));
+                }
+                IndexingOp::Commit => {
+                    index_writer.commit().unwrap();
+                }
+                IndexingOp::Merge => {
+                    let segment_ids = index
+                        .searchable_segment_ids()
+                        .expect("Searchable segments failed.");
+                    if segment_ids.len() >= 2 {
+                        block_on(index_writer.merge(&segment_ids)).unwrap();
+                        assert!(index_writer.segment_updater().wait_merging_thread().is_ok());
+                    }
+                }
+            }
+        }
+
+        assert!(index_writer.commit().is_ok());
+
+        // Merging the segments
+        {
+            let segment_ids = index
+                .searchable_segment_ids()
+                .expect("Searchable segments failed.");
+            if !segment_ids.is_empty() {
+                block_on(index_writer.merge(&segment_ids)).unwrap();
+                assert!(index_writer.wait_merging_threads().is_ok());
+            }
+        }
+    }
+
+    #[derive(Debug, Clone, Copy)]
+    enum IndexingOp {
+        AddDoc { id: u32 },
+        DeleteDoc { id: u32 },
+        Commit,
+        Merge,
+    }
+
+    fn operation_strategy() -> impl Strategy<Value = IndexingOp> {
+        prop_oneof![
+            (0u32..10u32).prop_map(|id| IndexingOp::DeleteDoc { id }),
+            (0u32..10u32).prop_map(|id| IndexingOp::AddDoc { id }),
+            (0u32..2u32).prop_map(|_| IndexingOp::Commit),
+            (0u32..1u32).prop_map(|_| IndexingOp::Merge),
+        ]
+    }
+
+    proptest! {
+        #[test]
+        fn test_multivalued_proptest(ops in proptest::collection::vec(operation_strategy(), 1..10)) {
+            test_multivalued_no_panic(&ops[..]);
+        }
+    }
+
+    #[test]
+    fn test_multivalued_proptest_off_by_one_bug_1151() {
+        use IndexingOp::*;
+        let ops = [
+            AddDoc { id: 3 },
+            AddDoc { id: 1 },
+            AddDoc { id: 3 },
+            Commit,
+            Merge,
+        ];
+
+        test_multivalued_no_panic(&ops[..]);
+    }
+
    #[test]
    #[ignore]
    fn test_many_facets() {
--- a/src/fastfield/multivalued/reader.rs
+++ b/src/fastfield/multivalued/reader.rs
@@ -1,8 +1,6 @@
 use std::ops::Range;

-use crate::fastfield::{
-    BitpackedFastFieldReader, DynamicFastFieldReader, FastFieldReader, FastValue, MultiValueLength,
-};
+use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue, MultiValueLength};
 use crate::DocId;

 /// Reader for a multivalued `u64` fast field.
@@ -16,13 +14,13 @@ use crate::DocId;
 #[derive(Clone)]
 pub struct MultiValuedFastFieldReader<Item: FastValue> {
    idx_reader: DynamicFastFieldReader<u64>,
-    vals_reader: BitpackedFastFieldReader<Item>,
+    vals_reader: DynamicFastFieldReader<Item>,
 }

 impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
    pub(crate) fn open(
        idx_reader: DynamicFastFieldReader<u64>,
-        vals_reader: BitpackedFastFieldReader<Item>,
+        vals_reader: DynamicFastFieldReader<Item>,
    ) -> MultiValuedFastFieldReader<Item> {
        MultiValuedFastFieldReader {
            idx_reader,
@@ -32,6 +30,7 @@ impl<Item: FastValue> MultiValuedFastFieldReader<Item> {

    /// Returns `(start, stop)`, such that the values associated
    /// to the given document are `start..stop`.
+    #[inline]
    fn range(&self, doc: DocId) -> Range<u64> {
        let start = self.idx_reader.get(doc);
        let stop = self.idx_reader.get(doc + 1);
@@ -39,20 +38,41 @@ impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
    }

    /// Returns the array of values associated to the given `doc`.
+    #[inline]
    pub fn get_vals(&self, doc: DocId, vals: &mut Vec<Item>) {
        let range = self.range(doc);
        let len = (range.end - range.start) as usize;
        vals.resize(len, Item::make_zero());
-        self.vals_reader.get_range_u64(range.start, &mut vals[..]);
+        self.vals_reader.get_range(range.start, &mut vals[..]);
+    }
+
+    /// Returns the minimum value for this fast field.
+    ///
+    /// The min value does not take in account of possible
+    /// deleted document, and should be considered as a lower bound
+    /// of the actual mimimum value.
+    pub fn min_value(&self) -> Item {
+        self.vals_reader.min_value()
+    }
+
+    /// Returns the maximum value for this fast field.
+    ///
+    /// The max value does not take in account of possible
+    /// deleted document, and should be considered as an upper bound
+    /// of the actual maximum value.
+    pub fn max_value(&self) -> Item {
+        self.vals_reader.max_value()
    }

    /// Returns the number of values associated with the document `DocId`.
+    #[inline]
    pub fn num_vals(&self, doc: DocId) -> usize {
        let range = self.range(doc);
        (range.end - range.start) as usize
    }

    /// Returns the overall number of values in this field  .
+    #[inline]
    pub fn total_num_vals(&self) -> u64 {
        self.idx_reader.max_value()
    }
@@ -71,7 +91,7 @@ impl<Item: FastValue> MultiValueLength for MultiValuedFastFieldReader<Item> {
 mod tests {

    use crate::core::Index;
-    use crate::schema::{Facet, Schema, INDEXED};
+    use crate::schema::{Cardinality, Facet, IntOptions, Schema, INDEXED};

    #[test]
    fn test_multifastfield_reader() {
@@ -126,4 +146,32 @@ mod tests {
            assert_eq!(&vals[..], &[4]);
        }
    }
+
+    #[test]
+    fn test_multifastfield_reader_min_max() {
+        let mut schema_builder = Schema::builder();
+        let field_options = IntOptions::default()
+            .set_indexed()
+            .set_fast(Cardinality::MultiValues);
+        let item_field = schema_builder.add_i64_field("items", field_options);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index
+            .writer_for_tests()
+            .expect("Failed to create index writer.");
+        index_writer.add_document(doc!(
+            item_field => 2i64,
+            item_field => 3i64,
+            item_field => -2i64,
+        ));
+        index_writer.add_document(doc!(item_field => 6i64, item_field => 3i64));
+        index_writer.add_document(doc!(item_field => 4i64));
+        index_writer.commit().expect("Commit failed");
+        let searcher = index.reader().unwrap().searcher();
+        let segment_reader = searcher.segment_reader(0);
+        let field_reader = segment_reader.fast_fields().i64s(item_field).unwrap();
+
+        assert_eq!(field_reader.min_value(), -2);
+        assert_eq!(field_reader.max_value(), 6);
+    }
 }
--- a/src/fastfield/multivalued/writer.rs
+++ b/src/fastfield/multivalued/writer.rs
@@ -102,11 +102,11 @@ impl MultiValuedFastFieldWriter {
        &'a self,
        doc_id_map: Option<&'b DocIdMapping>,
    ) -> impl Iterator<Item = &'b [u64]> {
-        let doc_id_iter = if let Some(doc_id_map) = doc_id_map {
-            Box::new(doc_id_map.iter_old_doc_ids().cloned()) as Box<dyn Iterator<Item = u32>>
+        let doc_id_iter: Box<dyn Iterator<Item = u32>> = if let Some(doc_id_map) = doc_id_map {
+            Box::new(doc_id_map.iter_old_doc_ids())
        } else {
-            Box::new(self.doc_index.iter().enumerate().map(|el| el.0 as u32))
-                as Box<dyn Iterator<Item = u32>>
+            let max_doc = self.doc_index.len() as DocId;
+            Box::new(0..max_doc)
        };
        doc_id_iter.map(move |doc_id| self.get_values_for_doc_id(doc_id))
    }
--- a/src/fastfield/reader.rs
+++ b/src/fastfield/reader.rs
@@ -1,6 +1,5 @@
 use super::FastValue;
-use crate::common::BinarySerializable;
-use crate::common::CompositeFile;
+use crate::directory::CompositeFile;
 use crate::directory::FileSlice;
 use crate::directory::OwnedBytes;
 use crate::directory::{Directory, RamDirectory, WritePtr};
@@ -8,6 +7,7 @@ use crate::fastfield::{CompositeFastFieldSerializer, FastFieldsWriter};
 use crate::schema::Schema;
 use crate::schema::FAST;
 use crate::DocId;
+use common::BinarySerializable;
 use fastfield_codecs::bitpacked::BitpackedFastFieldReader as BitpackedReader;
 use fastfield_codecs::bitpacked::BitpackedFastFieldSerializer;
 use fastfield_codecs::linearinterpol::LinearInterpolFastFieldReader;
@@ -44,13 +44,13 @@ pub trait FastFieldReader<Item: FastValue>: Clone {
    ///
    /// May panic if `start + output.len()` is greater than
    /// the segment's `maxdoc`.
-    fn get_range(&self, start: DocId, output: &mut [Item]);
+    fn get_range(&self, start: u64, output: &mut [Item]);

    /// Returns the minimum value for this fast field.
    ///
-    /// The max value does not take in account of possible
-    /// deleted document, and should be considered as an upper bound
-    /// of the actual maximum value.
+    /// The min value does not take in account of possible
+    /// deleted document, and should be considered as a lower bound
+    /// of the actual mimimum value.
    fn min_value(&self) -> Item;

    /// Returns the maximum value for this fast field.
@@ -120,7 +120,7 @@ impl<Item: FastValue> FastFieldReader<Item> for DynamicFastFieldReader<Item> {
            Self::MultiLinearInterpol(reader) => reader.get(doc),
        }
    }
-    fn get_range(&self, start: DocId, output: &mut [Item]) {
+    fn get_range(&self, start: u64, output: &mut [Item]) {
        match self {
            Self::Bitpacked(reader) => reader.get_range(start, output),
            Self::LinearInterpol(reader) => reader.get_range(start, output),
@@ -226,8 +226,8 @@ impl<Item: FastValue, C: FastFieldCodecReader + Clone> FastFieldReader<Item>
    ///
    /// May panic if `start + output.len()` is greater than
    /// the segment's `maxdoc`.
-    fn get_range(&self, start: DocId, output: &mut [Item]) {
-        self.get_range_u64(u64::from(start), output);
+    fn get_range(&self, start: u64, output: &mut [Item]) {
+        self.get_range_u64(start, output);
    }

    /// Returns the minimum value for this fast field.
--- a/src/fastfield/readers.rs
+++ b/src/fastfield/readers.rs
@@ -1,4 +1,4 @@
-use crate::common::CompositeFile;
+use crate::directory::CompositeFile;
 use crate::directory::FileSlice;
 use crate::fastfield::MultiValuedFastFieldReader;
 use crate::fastfield::{BitpackedFastFieldReader, FastFieldNotAvailableError};
@@ -99,12 +99,19 @@ impl FastFieldReaders {
        Ok(())
    }

+    pub(crate) fn typed_fast_field_reader_with_idx<TFastValue: FastValue>(
+        &self,
+        field: Field,
+        index: usize,
+    ) -> crate::Result<DynamicFastFieldReader<TFastValue>> {
+        let fast_field_slice = self.fast_field_data(field, index)?;
+        DynamicFastFieldReader::open(fast_field_slice)
+    }
    pub(crate) fn typed_fast_field_reader<TFastValue: FastValue>(
        &self,
        field: Field,
    ) -> crate::Result<DynamicFastFieldReader<TFastValue>> {
-        let fast_field_slice = self.fast_field_data(field, 0)?;
-        DynamicFastFieldReader::open(fast_field_slice)
+        self.typed_fast_field_reader_with_idx(field, 0)
    }

    pub(crate) fn typed_fast_field_multi_reader<TFastValue: FastValue>(
@@ -112,9 +119,7 @@ impl FastFieldReaders {
        field: Field,
    ) -> crate::Result<MultiValuedFastFieldReader<TFastValue>> {
        let idx_reader = self.typed_fast_field_reader(field)?;
-        let fast_field_slice_vals = self.fast_field_data(field, 1)?;
-        let vals_reader: BitpackedFastFieldReader<TFastValue> =
-            BitpackedFastFieldReader::open(fast_field_slice_vals)?;
+        let vals_reader = self.typed_fast_field_reader_with_idx(field, 1)?;
        Ok(MultiValuedFastFieldReader::open(idx_reader, vals_reader))
    }

--- a/src/fastfield/serializer/mod.rs
+++ b/src/fastfield/serializer/mod.rs
@@ -1,8 +1,8 @@
-use crate::common::BinarySerializable;
-use crate::common::CompositeWrite;
-use crate::common::CountingWriter;
+use crate::directory::CompositeWrite;
 use crate::directory::WritePtr;
 use crate::schema::Field;
+use common::BinarySerializable;
+use common::CountingWriter;
 pub use fastfield_codecs::bitpacked::BitpackedFastFieldSerializer;
 pub use fastfield_codecs::bitpacked::BitpackedFastFieldSerializerLegacy;
 use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
@@ -46,11 +46,7 @@ fn codec_estimation<T: FastFieldCodecSerializer, A: FastFieldDataAccess>(
    if !T::is_applicable(fastfield_accessor, stats.clone()) {
        return;
    }
-    let (ratio, name, id) = (
-        T::estimate(fastfield_accessor, stats.clone()),
-        T::NAME,
-        T::ID,
-    );
+    let (ratio, name, id) = (T::estimate(fastfield_accessor, stats), T::NAME, T::ID);
    estimations.push((ratio, name, id));
 }

@@ -71,7 +67,26 @@ impl CompositeFastFieldSerializer {
        data_iter_1: impl Iterator<Item = u64>,
        data_iter_2: impl Iterator<Item = u64>,
    ) -> io::Result<()> {
-        let field_write = self.composite_write.for_field_with_idx(field, 0);
+        self.create_auto_detect_u64_fast_field_with_idx(
+            field,
+            stats,
+            fastfield_accessor,
+            data_iter_1,
+            data_iter_2,
+            0,
+        )
+    }
+    /// Serialize data into a new u64 fast field. The best compression codec will be chosen automatically.
+    pub fn create_auto_detect_u64_fast_field_with_idx(
+        &mut self,
+        field: Field,
+        stats: FastFieldStats,
+        fastfield_accessor: impl FastFieldDataAccess,
+        data_iter_1: impl Iterator<Item = u64>,
+        data_iter_2: impl Iterator<Item = u64>,
+        idx: usize,
+    ) -> io::Result<()> {
+        let field_write = self.composite_write.for_field_with_idx(field, idx);

        let mut estimations = vec![];

@@ -90,9 +105,7 @@ impl CompositeFastFieldSerializer {
            &fastfield_accessor,
            &mut estimations,
        );
-        if let Some(broken_estimation) = estimations
-            .iter()
-            .find(|estimation| estimation.0 == f32::NAN)
+        if let Some(broken_estimation) = estimations.iter().find(|estimation| estimation.0.is_nan())
        {
            warn!(
                "broken estimation for fast field codec {}",
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -1,13 +1,12 @@
 use super::multivalued::MultiValuedFastFieldWriter;
 use super::serializer::FastFieldStats;
 use super::FastFieldDataAccess;
-use crate::common;
 use crate::fastfield::{BytesFastFieldWriter, CompositeFastFieldSerializer};
 use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::postings::UnorderedTermId;
 use crate::schema::{Cardinality, Document, Field, FieldEntry, FieldType, Schema};
 use crate::termdict::TermOrdinal;
-use crate::DocId;
+use common;
 use fnv::FnvHashMap;
 use std::collections::HashMap;
 use std::io;
@@ -296,7 +295,7 @@ impl IntFastFieldWriter {
        if let Some(doc_id_map) = doc_id_map {
            let iter = doc_id_map
                .iter_old_doc_ids()
-                .map(|doc_id| self.vals.get(*doc_id as usize));
+                .map(|doc_id| self.vals.get(doc_id as usize));
            serializer.create_auto_detect_u64_fast_field(
                self.field,
                stats,
@@ -323,16 +322,17 @@ struct WriterFastFieldAccessProvider<'map, 'bitp> {
    vals: &'bitp BlockedBitpacker,
 }
 impl<'map, 'bitp> FastFieldDataAccess for WriterFastFieldAccessProvider<'map, 'bitp> {
-    /// Return the value associated to the given document.
+    /// Return the value associated to the given doc.
    ///
-    /// This accessor should return as fast as possible.
+    /// Whenever possible use the Iterator passed to the fastfield creation instead, for performance reasons.
    ///
    /// # Panics
    ///
-    /// May panic if `doc` is greater than the segment
-    fn get(&self, doc: DocId) -> u64 {
+    /// May panic if `doc` is greater than the index.
+    fn get_val(&self, doc: u64) -> u64 {
        if let Some(doc_id_map) = self.doc_id_map {
-            self.vals.get(doc_id_map.get_old_doc_id(doc) as usize) // consider extra FastFieldReader wrapper for non doc_id_map
+            self.vals
+                .get(doc_id_map.get_old_doc_id(doc as u32) as usize) // consider extra FastFieldReader wrapper for non doc_id_map
        } else {
            self.vals.get(doc as usize)
        }
--- a/src/fieldnorm/reader.rs
+++ b/src/fieldnorm/reader.rs
@@ -1,5 +1,5 @@
 use super::{fieldnorm_to_id, id_to_fieldnorm};
-use crate::common::CompositeFile;
+use crate::directory::CompositeFile;
 use crate::directory::FileSlice;
 use crate::directory::OwnedBytes;
 use crate::schema::Field;
--- a/src/fieldnorm/serializer.rs
+++ b/src/fieldnorm/serializer.rs
@@ -1,4 +1,4 @@
-use crate::common::CompositeWrite;
+use crate::directory::CompositeWrite;
 use crate::directory::WritePtr;
 use crate::schema::Field;
 use std::io;
--- a/src/fieldnorm/writer.rs
+++ b/src/fieldnorm/writer.rs
@@ -98,7 +98,7 @@ impl FieldNormsWriter {
                let mut mapped_fieldnorm_values = vec![];
                mapped_fieldnorm_values.resize(fieldnorm_values.len(), 0u8);
                for (new_doc_id, old_doc_id) in doc_id_map.iter_old_doc_ids().enumerate() {
-                    mapped_fieldnorm_values[new_doc_id] = fieldnorm_values[*old_doc_id as usize];
+                    mapped_fieldnorm_values[new_doc_id] = fieldnorm_values[old_doc_id as usize];
                }
                fieldnorms_serializer.serialize_field(field, &mapped_fieldnorm_values)?;
            } else {
--- a/src/functional_test.rs
+++ b/src/functional_test.rs
@@ -1,4 +1,8 @@
+use crate::schema;
 use crate::Index;
+use crate::IndexSettings;
+use crate::IndexSortByField;
+use crate::Order;
 use crate::Searcher;
 use crate::{doc, schema::*};
 use rand::thread_rng;
@@ -35,10 +39,10 @@ fn test_functional_store() -> crate::Result<()> {
    let mut doc_set: Vec<u64> = Vec::new();

    let mut doc_id = 0u64;
-    for iteration in 0..500 {
+    for iteration in 0..get_num_iterations() {
        dbg!(iteration);
        let num_docs: usize = rng.gen_range(0..4);
-        if doc_set.len() >= 1 {
+        if !doc_set.is_empty() {
            let doc_to_remove_id = rng.gen_range(0..doc_set.len());
            let removed_doc_id = doc_set.swap_remove(doc_to_remove_id);
            index_writer.delete_term(Term::from_field_u64(id_field, removed_doc_id));
@@ -56,16 +60,37 @@ fn test_functional_store() -> crate::Result<()> {
    Ok(())
 }

+fn get_num_iterations() -> usize {
+    std::env::var("NUM_FUNCTIONAL_TEST_ITERATIONS")
+        .map(|str| str.parse().unwrap())
+        .unwrap_or(2000)
+}
 #[test]
 #[ignore]
-fn test_functional_indexing() -> crate::Result<()> {
+fn test_functional_indexing_sorted() -> crate::Result<()> {
    let mut schema_builder = Schema::builder();

-    let id_field = schema_builder.add_u64_field("id", INDEXED);
+    let id_field = schema_builder.add_u64_field("id", INDEXED | FAST);
    let multiples_field = schema_builder.add_u64_field("multiples", INDEXED);
+    let text_field_options = TextOptions::default()
+        .set_indexing_options(
+            TextFieldIndexing::default()
+                .set_index_option(schema::IndexRecordOption::WithFreqsAndPositions),
+        )
+        .set_stored();
+    let text_field = schema_builder.add_text_field("text_field", text_field_options);
    let schema = schema_builder.build();

-    let index = Index::create_from_tempdir(schema)?;
+    let mut index_builder = Index::builder().schema(schema);
+    index_builder = index_builder.settings(IndexSettings {
+        sort_by_field: Some(IndexSortByField {
+            field: "id".to_string(),
+            order: Order::Desc,
+        }),
+        ..Default::default()
+    });
+    let index = index_builder.create_from_tempdir().unwrap();
+
    let reader = index.reader()?;

    let mut rng = thread_rng();
@@ -75,7 +100,7 @@ fn test_functional_indexing() -> crate::Result<()> {
    let mut committed_docs: HashSet<u64> = HashSet::new();
    let mut uncommitted_docs: HashSet<u64> = HashSet::new();

-    for _ in 0..200 {
+    for _ in 0..get_num_iterations() {
        let random_val = rng.gen_range(0..20);
        if random_val == 0 {
            index_writer.commit()?;
@@ -88,19 +113,95 @@ fn test_functional_indexing() -> crate::Result<()> {
                &searcher,
                &committed_docs.iter().cloned().collect::<Vec<u64>>(),
            )?;
+        } else if committed_docs.remove(&random_val) || uncommitted_docs.remove(&random_val) {
+            let doc_id_term = Term::from_field_u64(id_field, random_val);
+            index_writer.delete_term(doc_id_term);
        } else {
-            if committed_docs.remove(&random_val) || uncommitted_docs.remove(&random_val) {
-                let doc_id_term = Term::from_field_u64(id_field, random_val);
-                index_writer.delete_term(doc_id_term);
-            } else {
-                uncommitted_docs.insert(random_val);
-                let mut doc = Document::new();
-                doc.add_u64(id_field, random_val);
-                for i in 1u64..10u64 {
-                    doc.add_u64(multiples_field, random_val * i);
-                }
-                index_writer.add_document(doc);
+            uncommitted_docs.insert(random_val);
+            let mut doc = Document::new();
+            doc.add_u64(id_field, random_val);
+            for i in 1u64..10u64 {
+                doc.add_u64(multiples_field, random_val * i);
            }
+            doc.add_text(text_field, get_text());
+            index_writer.add_document(doc);
+        }
+    }
+    Ok(())
+}
+
+const LOREM: &str = "Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed \
+             do eiusmod tempor incididunt ut labore et dolore magna aliqua. \
+             Ut enim ad minim veniam, quis nostrud exercitation ullamco \
+             laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure \
+             dolor in reprehenderit in voluptate velit esse cillum dolore eu \
+             fugiat nulla pariatur. Excepteur sint occaecat cupidatat non \
+             proident, sunt in culpa qui officia deserunt mollit anim id est \
+             laborum.";
+fn get_text() -> String {
+    use rand::seq::SliceRandom;
+    let mut rng = thread_rng();
+    let tokens: Vec<_> = LOREM.split(' ').collect();
+    let random_val = rng.gen_range(0..20);
+
+    (0..random_val)
+        .map(|_| tokens.choose(&mut rng).unwrap())
+        .cloned()
+        .collect::<Vec<_>>()
+        .join(" ")
+}
+
+#[test]
+#[ignore]
+fn test_functional_indexing_unsorted() -> crate::Result<()> {
+    let mut schema_builder = Schema::builder();
+
+    let id_field = schema_builder.add_u64_field("id", INDEXED);
+    let multiples_field = schema_builder.add_u64_field("multiples", INDEXED);
+    let text_field_options = TextOptions::default()
+        .set_indexing_options(
+            TextFieldIndexing::default()
+                .set_index_option(schema::IndexRecordOption::WithFreqsAndPositions),
+        )
+        .set_stored();
+    let text_field = schema_builder.add_text_field("text_field", text_field_options);
+    let schema = schema_builder.build();
+
+    let index = Index::create_from_tempdir(schema)?;
+    let reader = index.reader()?;
+
+    let mut rng = thread_rng();
+
+    let mut index_writer = index.writer_with_num_threads(3, 120_000_000)?;
+
+    let mut committed_docs: HashSet<u64> = HashSet::new();
+    let mut uncommitted_docs: HashSet<u64> = HashSet::new();
+
+    for _ in 0..get_num_iterations() {
+        let random_val = rng.gen_range(0..20);
+        if random_val == 0 {
+            index_writer.commit()?;
+            committed_docs.extend(&uncommitted_docs);
+            uncommitted_docs.clear();
+            reader.reload()?;
+            let searcher = reader.searcher();
+            // check that everything is correct.
+            check_index_content(
+                &searcher,
+                &committed_docs.iter().cloned().collect::<Vec<u64>>(),
+            )?;
+        } else if committed_docs.remove(&random_val) || uncommitted_docs.remove(&random_val) {
+            let doc_id_term = Term::from_field_u64(id_field, random_val);
+            index_writer.delete_term(doc_id_term);
+        } else {
+            uncommitted_docs.insert(random_val);
+            let mut doc = Document::new();
+            doc.add_u64(id_field, random_val);
+            for i in 1u64..10u64 {
+                doc.add_u64(multiples_field, random_val * i);
+            }
+            doc.add_text(text_field, get_text());
+            index_writer.add_document(doc);
        }
    }
    Ok(())
--- a/src/indexer/delete_queue.rs
+++ b/src/indexer/delete_queue.rs
@@ -1,6 +1,6 @@
 use super::operation::DeleteOperation;
 use crate::Opstamp;
-use std::mem;
+
 use std::ops::DerefMut;
 use std::sync::{Arc, RwLock, Weak};

@@ -105,7 +105,7 @@ impl DeleteQueue {
            return None;
        }

-        let delete_operations = mem::replace(&mut self_wlock.writer, vec![]);
+        let delete_operations = std::mem::take(&mut self_wlock.writer);

        let new_block = Arc::new(Block {
            operations: Arc::from(delete_operations.into_boxed_slice()),
@@ -286,7 +286,7 @@ mod tests {
            operations_it.advance();
        }
        {
-            let mut operations_it = snapshot.clone();
+            let mut operations_it = snapshot;
            assert_eq!(operations_it.get().unwrap().opstamp, 1);
            operations_it.advance();
            assert_eq!(operations_it.get().unwrap().opstamp, 2);
--- a/src/indexer/doc_id_mapping.rs
+++ b/src/indexer/doc_id_mapping.rs
@@ -2,13 +2,61 @@
 //! to get mappings from old doc_id to new doc_id and vice versa, after sorting
 //!

-use super::SegmentWriter;
+use super::{merger::SegmentReaderWithOrdinal, SegmentWriter};
 use crate::{
    schema::{Field, Schema},
    DocId, IndexSortByField, Order, TantivyError,
 };
-use std::cmp::Reverse;
-/// Struct to provide mapping from old doc_id to new doc_id and vice versa
+use std::{cmp::Reverse, ops::Index};
+
+/// Struct to provide mapping from new doc_id to old doc_id and segment.
+#[derive(Clone)]
+pub(crate) struct SegmentDocidMapping<'a> {
+    new_doc_id_to_old_and_segment: Vec<(DocId, SegmentReaderWithOrdinal<'a>)>,
+    is_trivial: bool,
+}
+
+impl<'a> SegmentDocidMapping<'a> {
+    pub(crate) fn new(
+        new_doc_id_to_old_and_segment: Vec<(DocId, SegmentReaderWithOrdinal<'a>)>,
+        is_trivial: bool,
+    ) -> Self {
+        Self {
+            new_doc_id_to_old_and_segment,
+            is_trivial,
+        }
+    }
+    pub(crate) fn iter(&self) -> impl Iterator<Item = &(DocId, SegmentReaderWithOrdinal)> {
+        self.new_doc_id_to_old_and_segment.iter()
+    }
+    pub(crate) fn len(&self) -> usize {
+        self.new_doc_id_to_old_and_segment.len()
+    }
+    /// This flags means the segments are simply stacked in the order of their ordinal.
+    /// e.g. [(0, 1), .. (n, 1), (0, 2)..., (m, 2)]
+    ///
+    /// This allows for some optimization.
+    pub(crate) fn is_trivial(&self) -> bool {
+        self.is_trivial
+    }
+}
+impl<'a> Index<usize> for SegmentDocidMapping<'a> {
+    type Output = (DocId, SegmentReaderWithOrdinal<'a>);
+
+    fn index(&self, idx: usize) -> &Self::Output {
+        &self.new_doc_id_to_old_and_segment[idx]
+    }
+}
+impl<'a> IntoIterator for SegmentDocidMapping<'a> {
+    type Item = (DocId, SegmentReaderWithOrdinal<'a>);
+    type IntoIter = std::vec::IntoIter<Self::Item>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.new_doc_id_to_old_and_segment.into_iter()
+    }
+}
+
+/// Struct to provide mapping from old doc_id to new doc_id and vice versa within a segment.
 pub struct DocIdMapping {
    new_doc_id_to_old: Vec<DocId>,
    old_doc_id_to_new: Vec<DocId>,
@@ -24,8 +72,8 @@ impl DocIdMapping {
        self.new_doc_id_to_old[doc_id as usize]
    }
    /// iterate over old doc_ids in order of the new doc_ids
-    pub fn iter_old_doc_ids(&self) -> std::slice::Iter<'_, DocId> {
-        self.new_doc_id_to_old.iter()
+    pub fn iter_old_doc_ids(&self) -> impl Iterator<Item = DocId> + Clone + '_ {
+        self.new_doc_id_to_old.iter().cloned()
    }
 }

--- a/src/indexer/doc_opstamp_mapping.rs
+++ b/src/indexer/doc_opstamp_mapping.rs
@@ -14,35 +14,27 @@ use crate::Opstamp;
 // The doc to opstamp mapping stores precisely an array
 // indexed by doc id and storing the opstamp of the document.
 //
-// This mapping is (for the moment) stricly increasing
-// because of the way document id are allocated.
+// This mapping is NOT necessarily increasing, because
+// we might be sorting documents according to a fast field.
 #[derive(Clone)]
 pub enum DocToOpstampMapping<'a> {
    WithMap(&'a [Opstamp]),
    None,
 }

-impl<'a> From<&'a [u64]> for DocToOpstampMapping<'a> {
-    fn from(opstamps: &[Opstamp]) -> DocToOpstampMapping {
-        DocToOpstampMapping::WithMap(opstamps)
-    }
-}
-
 impl<'a> DocToOpstampMapping<'a> {
-    /// Given an opstamp return the limit doc id L
-    /// such that all doc id D such that
-    // D >= L iff opstamp(D) >= than `target_opstamp`.
-    //
-    // The edge case opstamp = some doc opstamp is in practise
-    // never called.
-    pub fn compute_doc_limit(&self, target_opstamp: Opstamp) -> DocId {
-        match *self {
-            DocToOpstampMapping::WithMap(ref doc_opstamps) => {
-                match doc_opstamps.binary_search(&target_opstamp) {
-                    Ok(doc_id) | Err(doc_id) => doc_id as DocId,
-                }
+    /// Assess whether a document should be considered deleted given that it contains
+    /// a deleted term that was deleted at the opstamp: `delete_opstamp`.
+    ///
+    /// This function returns true if the `DocToOpstamp` mapping is none or if
+    /// the `doc_opstamp` is anterior to the delete opstamp.
+    pub fn is_deleted(&self, doc_id: DocId, delete_opstamp: Opstamp) -> bool {
+        match self {
+            Self::WithMap(doc_opstamps) => {
+                let doc_opstamp = doc_opstamps[doc_id as usize];
+                doc_opstamp < delete_opstamp
            }
-            DocToOpstampMapping::None => DocId::max_value(),
+            Self::None => true,
        }
    }
 }
@@ -55,40 +47,17 @@ mod tests {
    #[test]
    fn test_doc_to_opstamp_mapping_none() {
        let doc_to_opstamp_mapping = DocToOpstampMapping::None;
-        assert_eq!(
-            doc_to_opstamp_mapping.compute_doc_limit(1),
-            u32::max_value()
-        );
+        assert!(doc_to_opstamp_mapping.is_deleted(1u32, 0u64));
+        assert!(doc_to_opstamp_mapping.is_deleted(1u32, 2u64));
    }

    #[test]
-    fn test_doc_to_opstamp_mapping_complex() {
-        {
-            let doc_to_opstamp_mapping = DocToOpstampMapping::from(&[][..]);
-            assert_eq!(doc_to_opstamp_mapping.compute_doc_limit(0u64), 0);
-            assert_eq!(doc_to_opstamp_mapping.compute_doc_limit(2u64), 0);
-        }
-        {
-            let doc_to_opstamp_mapping = DocToOpstampMapping::from(&[1u64][..]);
-            assert_eq!(doc_to_opstamp_mapping.compute_doc_limit(0u64), 0);
-            assert_eq!(doc_to_opstamp_mapping.compute_doc_limit(2u64), 1);
-        }
-        {
-            let doc_to_opstamp_mapping =
-                DocToOpstampMapping::from(&[1u64, 12u64, 17u64, 23u64][..]);
-            assert_eq!(doc_to_opstamp_mapping.compute_doc_limit(0u64), 0);
-            for i in 2u64..13u64 {
-                assert_eq!(doc_to_opstamp_mapping.compute_doc_limit(i), 1);
-            }
-            for i in 13u64..18u64 {
-                assert_eq!(doc_to_opstamp_mapping.compute_doc_limit(i), 2);
-            }
-            for i in 18u64..24u64 {
-                assert_eq!(doc_to_opstamp_mapping.compute_doc_limit(i), 3);
-            }
-            for i in 24u64..30u64 {
-                assert_eq!(doc_to_opstamp_mapping.compute_doc_limit(i), 4);
-            }
-        }
+    fn test_doc_to_opstamp_mapping_with_map() {
+        let doc_to_opstamp_mapping = DocToOpstampMapping::WithMap(&[5u64, 1u64, 0u64, 4u64, 3u64]);
+        assert_eq!(doc_to_opstamp_mapping.is_deleted(0u32, 2u64), false);
+        assert_eq!(doc_to_opstamp_mapping.is_deleted(1u32, 2u64), true);
+        assert_eq!(doc_to_opstamp_mapping.is_deleted(2u32, 2u64), true);
+        assert_eq!(doc_to_opstamp_mapping.is_deleted(3u32, 2u64), false);
+        assert_eq!(doc_to_opstamp_mapping.is_deleted(4u32, 2u64), false);
    }
 }
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -1,7 +1,6 @@
 use super::operation::{AddOperation, UserOperation};
 use super::segment_updater::SegmentUpdater;
 use super::PreparedCommit;
-use crate::common::BitSet;
 use crate::core::Index;
 use crate::core::Segment;
 use crate::core::SegmentComponent;
@@ -24,6 +23,7 @@ use crate::schema::Document;
 use crate::schema::IndexRecordOption;
 use crate::schema::Term;
 use crate::Opstamp;
+use common::BitSet;
 use crossbeam::channel;
 use futures::executor::block_on;
 use futures::future::Future;
@@ -106,22 +106,18 @@ fn compute_deleted_bitset(
        }

        // A delete operation should only affect
-        // document that were inserted after it.
-        //
-        // Limit doc helps identify the first document
-        // that may be affected by the delete operation.
-        let limit_doc = doc_opstamps.compute_doc_limit(delete_op.opstamp);
+        // document that were inserted before it.
        let inverted_index = segment_reader.inverted_index(delete_op.term.field())?;
        if let Some(mut docset) =
            inverted_index.read_postings(&delete_op.term, IndexRecordOption::Basic)?
        {
-            let mut deleted_doc = docset.doc();
-            while deleted_doc != TERMINATED {
-                if deleted_doc < limit_doc {
-                    delete_bitset.insert(deleted_doc);
+            let mut doc_matching_deleted_term = docset.doc();
+            while doc_matching_deleted_term != TERMINATED {
+                if doc_opstamps.is_deleted(doc_matching_deleted_term, delete_op.opstamp) {
+                    delete_bitset.insert(doc_matching_deleted_term);
                    might_have_changed = true;
                }
-                deleted_doc = docset.advance();
+                doc_matching_deleted_term = docset.advance();
            }
        }
        delete_cursor.advance();
@@ -230,14 +226,8 @@ fn index_documents(

    let segment_with_max_doc = segment.with_max_doc(max_doc);

-    let last_docstamp: Opstamp = *(doc_opstamps.last().unwrap());
-
-    let delete_bitset_opt = apply_deletes(
-        &segment_with_max_doc,
-        &mut delete_cursor,
-        &doc_opstamps,
-        last_docstamp,
-    )?;
+    let delete_bitset_opt =
+        apply_deletes(&segment_with_max_doc, &mut delete_cursor, &doc_opstamps)?;

    let meta = segment_with_max_doc.meta().clone();
    meta.untrack_temp_docstore();
@@ -247,19 +237,26 @@ fn index_documents(
    Ok(true)
 }

+/// `doc_opstamps` is required to be non-empty.
 fn apply_deletes(
    segment: &Segment,
    mut delete_cursor: &mut DeleteCursor,
    doc_opstamps: &[Opstamp],
-    last_docstamp: Opstamp,
 ) -> crate::Result<Option<BitSet>> {
    if delete_cursor.get().is_none() {
        // if there are no delete operation in the queue, no need
        // to even open the segment.
        return Ok(None);
    }
+
+    let max_doc_opstamp: Opstamp = doc_opstamps
+        .iter()
+        .cloned()
+        .max()
+        .expect("Empty DocOpstamp is forbidden");
+
    let segment_reader = SegmentReader::open(segment)?;
-    let doc_to_opstamps = DocToOpstampMapping::from(doc_opstamps);
+    let doc_to_opstamps = DocToOpstampMapping::WithMap(doc_opstamps);

    let max_doc = segment.meta().max_doc();
    let mut deleted_bitset = BitSet::with_max_value(max_doc);
@@ -268,7 +265,7 @@ fn apply_deletes(
        &segment_reader,
        &mut delete_cursor,
        &doc_to_opstamps,
-        last_docstamp,
+        max_doc_opstamp,
    )?;
    Ok(if may_have_deletes {
        Some(deleted_bitset)
@@ -358,7 +355,7 @@ impl IndexWriter {
        // dropping the last reference to the segment_updater.
        self.drop_sender();

-        let former_workers_handles = mem::replace(&mut self.workers_join_handle, vec![]);
+        let former_workers_handles = std::mem::take(&mut self.workers_join_handle);
        for join_handle in former_workers_handles {
            join_handle
                .join()
@@ -628,7 +625,7 @@ impl IndexWriter {
        // and recreate a new one.
        self.recreate_document_channel();

-        let former_workers_join_handle = mem::replace(&mut self.workers_join_handle, Vec::new());
+        let former_workers_join_handle = std::mem::take(&mut self.workers_join_handle);

        for worker_handle in former_workers_join_handle {
            let indexing_worker_result = worker_handle
@@ -784,17 +781,44 @@ impl Drop for IndexWriter {

 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
+    use std::collections::HashSet;
+
+    use futures::executor::block_on;
+    use proptest::prelude::*;
+    use proptest::prop_oneof;
+    use proptest::strategy::Strategy;

    use super::super::operation::UserOperation;
    use crate::collector::TopDocs;
    use crate::directory::error::LockError;
    use crate::error::*;
+    use crate::fastfield::FastFieldReader;
    use crate::indexer::NoMergePolicy;
+    use crate::query::QueryParser;
    use crate::query::TermQuery;
-    use crate::schema::{self, IndexRecordOption, STRING};
+    use crate::schema::Cardinality;
+    use crate::schema::Facet;
+    use crate::schema::IntOptions;
+    use crate::schema::TextFieldIndexing;
+    use crate::schema::TextOptions;
+    use crate::schema::STORED;
+    use crate::schema::TEXT;
+    use crate::schema::{self, IndexRecordOption, FAST, INDEXED, STRING};
+    use crate::DocAddress;
    use crate::Index;
    use crate::ReloadPolicy;
    use crate::Term;
+    use crate::{IndexSettings, IndexSortByField, Order};
+
+    const LOREM: &str = "Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed \
+             do eiusmod tempor incididunt ut labore et dolore magna aliqua. \
+             Ut enim ad minim veniam, quis nostrud exercitation ullamco \
+             laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure \
+             dolor in reprehenderit in voluptate velit esse cillum dolore eu \
+             fugiat nulla pariatur. Excepteur sint occaecat cupidatat non \
+             proident, sunt in culpa qui officia deserunt mollit anim id est \
+             laborum.";

    #[test]
    fn test_operations_group() {
@@ -1282,6 +1306,343 @@ mod tests {
        assert!(commit_again.is_ok());
    }

+    #[test]
+    fn test_delete_with_sort_by_field() -> crate::Result<()> {
+        let mut schema_builder = schema::Schema::builder();
+        let id_field =
+            schema_builder.add_u64_field("id", schema::INDEXED | schema::STORED | schema::FAST);
+        let schema = schema_builder.build();
+
+        let settings = IndexSettings {
+            sort_by_field: Some(IndexSortByField {
+                field: "id".to_string(),
+                order: Order::Desc,
+            }),
+            ..Default::default()
+        };
+
+        let index = Index::builder()
+            .schema(schema)
+            .settings(settings)
+            .create_in_ram()?;
+        let index_reader = index.reader()?;
+        let mut index_writer = index.writer_for_tests()?;
+
+        // create and delete docs in same commit
+        for id in 0u64..5u64 {
+            index_writer.add_document(doc!(id_field => id));
+        }
+        for id in 2u64..4u64 {
+            index_writer.delete_term(Term::from_field_u64(id_field, id));
+        }
+        for id in 5u64..10u64 {
+            index_writer.add_document(doc!(id_field => id));
+        }
+        index_writer.commit()?;
+        index_reader.reload()?;
+
+        let searcher = index_reader.searcher();
+        assert_eq!(searcher.segment_readers().len(), 1);
+
+        let segment_reader = searcher.segment_reader(0);
+        assert_eq!(segment_reader.num_docs(), 8);
+        assert_eq!(segment_reader.max_doc(), 10);
+        let fast_field_reader = segment_reader.fast_fields().u64(id_field)?;
+        let in_order_alive_ids: Vec<u64> = segment_reader
+            .doc_ids_alive()
+            .map(|doc| fast_field_reader.get(doc))
+            .collect();
+        assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 1, 0]);
+        Ok(())
+    }
+
+    #[derive(Debug, Clone, Copy)]
+    enum IndexingOp {
+        AddDoc { id: u64 },
+        DeleteDoc { id: u64 },
+        Commit,
+        Merge,
+    }
+
+    fn operation_strategy() -> impl Strategy<Value = IndexingOp> {
+        prop_oneof![
+            (0u64..10u64).prop_map(|id| IndexingOp::DeleteDoc { id }),
+            (0u64..10u64).prop_map(|id| IndexingOp::AddDoc { id }),
+            (0u64..2u64).prop_map(|_| IndexingOp::Commit),
+            (0u64..1u64).prop_map(|_| IndexingOp::Merge),
+        ]
+    }
+
+    fn expected_ids(ops: &[IndexingOp]) -> (HashMap<u64, u64>, HashSet<u64>) {
+        let mut existing_ids = HashMap::new();
+        let mut deleted_ids = HashSet::new();
+        for &op in ops {
+            match op {
+                IndexingOp::AddDoc { id } => {
+                    *existing_ids.entry(id).or_insert(0) += 1;
+                    deleted_ids.remove(&id);
+                }
+                IndexingOp::DeleteDoc { id } => {
+                    existing_ids.remove(&id);
+                    deleted_ids.insert(id);
+                }
+                _ => {}
+            }
+        }
+        (existing_ids, deleted_ids)
+    }
+
+    fn test_operation_strategy(
+        ops: &[IndexingOp],
+        sort_index: bool,
+        force_end_merge: bool,
+    ) -> crate::Result<()> {
+        let mut schema_builder = schema::Schema::builder();
+        let id_field = schema_builder.add_u64_field("id", FAST | INDEXED | STORED);
+        let text_field = schema_builder.add_text_field(
+            "text_field",
+            TextOptions::default()
+                .set_indexing_options(
+                    TextFieldIndexing::default()
+                        .set_index_option(schema::IndexRecordOption::WithFreqsAndPositions),
+                )
+                .set_stored(),
+        );
+
+        let large_text_field = schema_builder.add_text_field("large_text_field", TEXT | STORED);
+
+        let multi_numbers = schema_builder.add_u64_field(
+            "multi_numbers",
+            IntOptions::default()
+                .set_fast(Cardinality::MultiValues)
+                .set_stored(),
+        );
+        let facet_field = schema_builder.add_facet_field("facet", INDEXED);
+        let schema = schema_builder.build();
+        let settings = if sort_index {
+            IndexSettings {
+                sort_by_field: Some(IndexSortByField {
+                    field: "id".to_string(),
+                    order: Order::Asc,
+                }),
+                ..Default::default()
+            }
+        } else {
+            IndexSettings {
+                ..Default::default()
+            }
+        };
+        let index = Index::builder()
+            .schema(schema)
+            .settings(settings)
+            .create_in_ram()?;
+        let mut index_writer = index.writer_for_tests()?;
+        index_writer.set_merge_policy(Box::new(NoMergePolicy));
+
+        for &op in ops {
+            match op {
+                IndexingOp::AddDoc { id } => {
+                    let facet = Facet::from(&("/cola/".to_string() + &id.to_string()));
+                    index_writer
+                        .add_document(doc!(id_field=>id, multi_numbers=> id, multi_numbers => id, text_field => id.to_string(), facet_field => facet, large_text_field=> LOREM));
+                }
+                IndexingOp::DeleteDoc { id } => {
+                    index_writer.delete_term(Term::from_field_u64(id_field, id));
+                }
+                IndexingOp::Commit => {
+                    index_writer.commit()?;
+                }
+                IndexingOp::Merge => {
+                    let segment_ids = index
+                        .searchable_segment_ids()
+                        .expect("Searchable segments failed.");
+                    if segment_ids.len() >= 2 {
+                        block_on(index_writer.merge(&segment_ids)).unwrap();
+                        assert!(index_writer.segment_updater().wait_merging_thread().is_ok());
+                    }
+                }
+            }
+        }
+        index_writer.commit()?;
+
+        let searcher = index.reader()?.searcher();
+        if force_end_merge {
+            index_writer.wait_merging_threads()?;
+            let mut index_writer = index.writer_for_tests()?;
+            let segment_ids = index
+                .searchable_segment_ids()
+                .expect("Searchable segments failed.");
+            if segment_ids.len() >= 2 {
+                block_on(index_writer.merge(&segment_ids)).unwrap();
+                assert!(index_writer.wait_merging_threads().is_ok());
+            }
+        }
+        let ids: HashSet<u64> = searcher
+            .segment_readers()
+            .iter()
+            .flat_map(|segment_reader| {
+                let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap();
+                segment_reader
+                    .doc_ids_alive()
+                    .map(move |doc| ff_reader.get(doc))
+            })
+            .collect();
+
+        let (expected_ids_and_num_occurences, deleted_ids) = expected_ids(ops);
+        assert_eq!(
+            ids,
+            expected_ids_and_num_occurences
+                .keys()
+                .cloned()
+                .collect::<HashSet<_>>()
+        );
+
+        // multivalue fast field tests
+        for segment_reader in searcher.segment_readers().iter() {
+            let ff_reader = segment_reader.fast_fields().u64s(multi_numbers).unwrap();
+            for doc in segment_reader.doc_ids_alive() {
+                let mut vals = vec![];
+                ff_reader.get_vals(doc, &mut vals);
+                assert_eq!(vals.len(), 2);
+                assert_eq!(vals[0], vals[1]);
+                assert!(expected_ids_and_num_occurences.contains_key(&vals[0]));
+            }
+        }
+
+        // doc store tests
+        for segment_reader in searcher.segment_readers().iter() {
+            let store_reader = segment_reader.get_store_reader().unwrap();
+            // test store iterator
+            for doc in store_reader.iter(segment_reader.delete_bitset()) {
+                let id = doc
+                    .unwrap()
+                    .get_first(id_field)
+                    .unwrap()
+                    .u64_value()
+                    .unwrap();
+                assert!(expected_ids_and_num_occurences.contains_key(&id));
+            }
+            // test store random access
+            for doc_id in segment_reader.doc_ids_alive() {
+                let id = store_reader
+                    .get(doc_id)
+                    .unwrap()
+                    .get_first(id_field)
+                    .unwrap()
+                    .u64_value()
+                    .unwrap();
+                assert!(expected_ids_and_num_occurences.contains_key(&id));
+                let id2 = store_reader
+                    .get(doc_id)
+                    .unwrap()
+                    .get_first(multi_numbers)
+                    .unwrap()
+                    .u64_value()
+                    .unwrap();
+                assert_eq!(id, id2);
+            }
+        }
+        // test search
+        let my_text_field = index.schema().get_field("text_field").unwrap();
+
+        let do_search = |term: &str| {
+            let query = QueryParser::for_index(&index, vec![my_text_field])
+                .parse_query(term)
+                .unwrap();
+            let top_docs: Vec<(f32, DocAddress)> =
+                searcher.search(&query, &TopDocs::with_limit(1000)).unwrap();
+
+            top_docs.iter().map(|el| el.1).collect::<Vec<_>>()
+        };
+
+        for (existing_id, count) in expected_ids_and_num_occurences {
+            assert_eq!(do_search(&existing_id.to_string()).len() as u64, count);
+        }
+        for existing_id in deleted_ids {
+            assert_eq!(do_search(&existing_id.to_string()).len(), 0);
+        }
+        // test facets
+        for segment_reader in searcher.segment_readers().iter() {
+            let mut facet_reader = segment_reader.facet_reader(facet_field).unwrap();
+            let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap();
+            for doc_id in segment_reader.doc_ids_alive() {
+                let mut facet_ords = Vec::new();
+                facet_reader.facet_ords(doc_id, &mut facet_ords);
+                assert_eq!(facet_ords.len(), 1);
+                let mut facet = Facet::default();
+                facet_reader
+                    .facet_from_ord(facet_ords[0], &mut facet)
+                    .unwrap();
+                let id = ff_reader.get(doc_id);
+                let facet_expected = Facet::from(&("/cola/".to_string() + &id.to_string()));
+
+                assert_eq!(facet, facet_expected);
+            }
+        }
+        Ok(())
+    }
+
+    proptest! {
+        #[test]
+        fn test_delete_with_sort_proptest(ops in proptest::collection::vec(operation_strategy(), 1..10)) {
+            assert!(test_operation_strategy(&ops[..], true, false).is_ok());
+        }
+        #[test]
+        fn test_delete_without_sort_proptest(ops in proptest::collection::vec(operation_strategy(), 1..10)) {
+            assert!(test_operation_strategy(&ops[..], false, false).is_ok());
+        }
+        #[test]
+        fn test_delete_with_sort_proptest_with_merge(ops in proptest::collection::vec(operation_strategy(), 1..10)) {
+            assert!(test_operation_strategy(&ops[..], true, true).is_ok());
+        }
+        #[test]
+        fn test_delete_without_sort_proptest_with_merge(ops in proptest::collection::vec(operation_strategy(), 1..10)) {
+            assert!(test_operation_strategy(&ops[..], false, true).is_ok());
+        }
+    }
+
+    #[test]
+    fn test_delete_with_sort_by_field_last_opstamp_is_not_max() -> crate::Result<()> {
+        let mut schema_builder = schema::Schema::builder();
+        let sort_by_field = schema_builder.add_u64_field("sort_by", FAST);
+        let id_field = schema_builder.add_u64_field("id", INDEXED);
+        let schema = schema_builder.build();
+
+        let settings = IndexSettings {
+            sort_by_field: Some(IndexSortByField {
+                field: "sort_by".to_string(),
+                order: Order::Asc,
+            }),
+            ..Default::default()
+        };
+
+        let index = Index::builder()
+            .schema(schema)
+            .settings(settings)
+            .create_in_ram()?;
+        let mut index_writer = index.writer_for_tests()?;
+
+        // We add a doc...
+        index_writer.add_document(doc!(sort_by_field => 2u64, id_field => 0u64));
+        // And remove it.
+        index_writer.delete_term(Term::from_field_u64(id_field, 0u64));
+        // We add another doc.
+        index_writer.add_document(doc!(sort_by_field=>1u64, id_field => 0u64));
+
+        // The expected result is a segment with
+        // maxdoc = 2
+        // numdoc = 1.
+        index_writer.commit()?;
+
+        let searcher = index.reader()?.searcher();
+        assert_eq!(searcher.segment_readers().len(), 1);
+
+        let segment_reader = searcher.segment_reader(0);
+        assert_eq!(segment_reader.max_doc(), 2);
+        assert_eq!(segment_reader.num_deleted_docs(), 1);
+        Ok(())
+    }
+
    #[test]
    fn test_index_doc_missing_field() {
        let mut schema_builder = schema::Schema::builder();
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -1,4 +1,3 @@
-use super::doc_id_mapping::DocIdMapping;
 use crate::error::DataCorruption;
 use crate::fastfield::CompositeFastFieldSerializer;
 use crate::fastfield::DeleteBitSet;
@@ -6,10 +5,12 @@ use crate::fastfield::DynamicFastFieldReader;
 use crate::fastfield::FastFieldDataAccess;
 use crate::fastfield::FastFieldReader;
 use crate::fastfield::FastFieldStats;
+use crate::fastfield::MultiValueLength;
 use crate::fastfield::MultiValuedFastFieldReader;
 use crate::fieldnorm::FieldNormsSerializer;
 use crate::fieldnorm::FieldNormsWriter;
 use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
+use crate::indexer::doc_id_mapping::SegmentDocidMapping;
 use crate::indexer::SegmentSerializer;
 use crate::postings::Postings;
 use crate::postings::{InvertedIndexSerializer, SegmentPostings};
@@ -19,22 +20,28 @@ use crate::schema::{Field, Schema};
 use crate::store::StoreWriter;
 use crate::termdict::TermMerger;
 use crate::termdict::TermOrdinal;
-use crate::{common::HasLen, fastfield::MultiValueLength};
-use crate::{common::MAX_DOC_LIMIT, IndexSettings};
+use crate::IndexSettings;
+use crate::IndexSortByField;
 use crate::{core::Segment, indexer::doc_id_mapping::expect_field_id_for_sort_field};
 use crate::{core::SegmentReader, Order};
-use crate::{core::SerializableSegment, IndexSortByField};
 use crate::{
    docset::{DocSet, TERMINATED},
    SegmentOrdinal,
 };
 use crate::{DocId, InvertedIndexReader, SegmentComponent};
+use common::HasLen;
 use itertools::Itertools;
+use measure_time::debug_time;
 use std::cmp;
 use std::collections::HashMap;
 use std::sync::Arc;
 use tantivy_bitpacker::minmax;

+/// Segment's max doc must be `< MAX_DOC_LIMIT`.
+///
+/// We do not allow segments with more than
+pub const MAX_DOC_LIMIT: u32 = 1 << 31;
+
 fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::Result<u64> {
    let mut total_tokens = 0u64;
    let mut count: [usize; 256] = [0; 256];
@@ -143,7 +150,7 @@ impl TermOrdinalMapping {
            .iter()
            .flat_map(|term_ordinals| term_ordinals.iter().cloned().max())
            .max()
-            .unwrap_or_else(TermOrdinal::default)
+            .unwrap_or_default()
    }
 }

@@ -215,7 +222,7 @@ impl IndexMerger {
        let mut readers_with_min_sort_values = readers
            .into_iter()
            .map(|reader| {
-                let accessor = Self::get_sort_field_accessor(&reader, &sort_by_field)?;
+                let accessor = Self::get_sort_field_accessor(&reader, sort_by_field)?;
                Ok((reader, accessor.min_value()))
            })
            .collect::<crate::Result<Vec<_>>>()?;
@@ -233,33 +240,24 @@ impl IndexMerger {
    fn write_fieldnorms(
        &self,
        mut fieldnorms_serializer: FieldNormsSerializer,
-        doc_id_mapping: &Option<Vec<(DocId, SegmentReaderWithOrdinal)>>,
+        doc_id_mapping: &SegmentDocidMapping,
    ) -> crate::Result<()> {
        let fields = FieldNormsWriter::fields_with_fieldnorm(&self.schema);
        let mut fieldnorms_data = Vec::with_capacity(self.max_doc as usize);
        for field in fields {
            fieldnorms_data.clear();
-            if let Some(doc_id_mapping) = doc_id_mapping {
-                let fieldnorms_readers: Vec<FieldNormReader> = self
-                    .readers
-                    .iter()
-                    .map(|reader| reader.get_fieldnorms_reader(field))
-                    .collect::<Result<_, _>>()?;
-                for (doc_id, reader_with_ordinal) in doc_id_mapping {
-                    let fieldnorms_reader =
-                        &fieldnorms_readers[reader_with_ordinal.ordinal as usize];
-                    let fieldnorm_id = fieldnorms_reader.fieldnorm_id(*doc_id);
-                    fieldnorms_data.push(fieldnorm_id);
-                }
-            } else {
-                for reader in &self.readers {
-                    let fieldnorms_reader = reader.get_fieldnorms_reader(field)?;
-                    for doc_id in reader.doc_ids_alive() {
-                        let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc_id);
-                        fieldnorms_data.push(fieldnorm_id);
-                    }
-                }
+
+            let fieldnorms_readers: Vec<FieldNormReader> = self
+                .readers
+                .iter()
+                .map(|reader| reader.get_fieldnorms_reader(field))
+                .collect::<Result<_, _>>()?;
+            for (doc_id, reader_with_ordinal) in doc_id_mapping.iter() {
+                let fieldnorms_reader = &fieldnorms_readers[reader_with_ordinal.ordinal as usize];
+                let fieldnorm_id = fieldnorms_reader.fieldnorm_id(*doc_id);
+                fieldnorms_data.push(fieldnorm_id);
            }
+
            fieldnorms_serializer.serialize_field(field, &fieldnorms_data[..])?;
        }
        fieldnorms_serializer.close()?;
@@ -270,8 +268,10 @@ impl IndexMerger {
        &self,
        fast_field_serializer: &mut CompositeFastFieldSerializer,
        mut term_ord_mappings: HashMap<Field, TermOrdinalMapping>,
-        doc_id_mapping: &Option<Vec<(DocId, SegmentReaderWithOrdinal)>>,
+        doc_id_mapping: &SegmentDocidMapping,
    ) -> crate::Result<()> {
+        debug_time!("write_fast_fields");
+
        for (field, field_entry) in self.schema.fields() {
            let field_type = field_entry.field_type();
            match field_type {
@@ -319,7 +319,7 @@ impl IndexMerger {
        &self,
        field: Field,
        fast_field_serializer: &mut CompositeFastFieldSerializer,
-        doc_id_mapping: &Option<Vec<(DocId, SegmentReaderWithOrdinal)>>,
+        doc_id_mapping: &SegmentDocidMapping,
    ) -> crate::Result<()> {
        let (min_value, max_value) = self.readers.iter().map(|reader|{
                let u64_reader: DynamicFastFieldReader<u64> = reader
@@ -328,7 +328,7 @@ impl IndexMerger {
                .expect("Failed to find a reader for single fast field. This is a tantivy bug and it should never happen.");
                compute_min_max_val(&u64_reader, reader.max_doc(), reader.delete_bitset())
            })
-            .filter_map(|x| x)
+            .flatten()
            .reduce(|a, b| {
                (a.0.min(b.0), a.1.max(b.1))
            }).expect("Unexpected error, empty readers in IndexMerger");
@@ -344,68 +344,44 @@ impl IndexMerger {
                u64_reader
            })
            .collect::<Vec<_>>();
-        if let Some(doc_id_mapping) = doc_id_mapping {
-            #[derive(Clone)]
-            struct SortedDocidFieldAccessProvider<'a> {
-                doc_id_mapping: &'a Vec<(DocId, SegmentReaderWithOrdinal<'a>)>,
-                fast_field_readers: &'a Vec<DynamicFastFieldReader<u64>>,
-            }
-            impl<'a> FastFieldDataAccess for SortedDocidFieldAccessProvider<'a> {
-                fn get(&self, doc: DocId) -> u64 {
-                    let (doc_id, reader_with_ordinal) = self.doc_id_mapping[doc as usize];
-                    self.fast_field_readers[reader_with_ordinal.ordinal as usize].get(doc_id)
-                }
-            }
-            let stats = FastFieldStats {
-                min_value,
-                max_value,
-                num_vals: doc_id_mapping.len() as u64,
-            };
-            let fastfield_accessor = SortedDocidFieldAccessProvider {
-                doc_id_mapping,
-                fast_field_readers: &fast_field_readers,
-            };
-            let iter = doc_id_mapping.iter().map(|(doc_id, reader_with_ordinal)| {
-                let fast_field_reader = &fast_field_readers[reader_with_ordinal.ordinal as usize];
-                fast_field_reader.get(*doc_id)
-            });
-            fast_field_serializer.create_auto_detect_u64_fast_field(
-                field,
-                stats,
-                fastfield_accessor,
-                iter.clone(),
-                iter,
-            )?;

-            Ok(())
-        } else {
-            let u64_readers = self.readers.iter()
-                .filter(|reader|reader.max_doc() != reader.delete_bitset().map(|bit_set|bit_set.len() as u32).unwrap_or(0))
-                .map(|reader|{
-                let u64_reader: DynamicFastFieldReader<u64> = reader
-                .fast_fields()
-                .typed_fast_field_reader(field)
-                .expect("Failed to find a reader for single fast field. This is a tantivy bug and it should never happen.");
-                (reader.max_doc(), u64_reader, reader.delete_bitset())
-            }).collect::<Vec<_>>();
-
-            let mut fast_single_field_serializer =
-                fast_field_serializer.new_u64_fast_field(field, min_value, max_value)?;
-            for (max_doc, u64_reader, delete_bitset_opt) in u64_readers {
-                for doc_id in 0u32..max_doc {
-                    let is_deleted = delete_bitset_opt
-                        .map(|delete_bitset| delete_bitset.is_deleted(doc_id))
-                        .unwrap_or(false);
-                    if !is_deleted {
-                        let val = u64_reader.get(doc_id);
-                        fast_single_field_serializer.add_val(val)?;
-                    }
-                }
-            }
-
-            fast_single_field_serializer.close_field()?;
-            Ok(())
+        let stats = FastFieldStats {
+            min_value,
+            max_value,
+            num_vals: doc_id_mapping.len() as u64,
+        };
+        #[derive(Clone)]
+        struct SortedDocidFieldAccessProvider<'a> {
+            doc_id_mapping: &'a SegmentDocidMapping<'a>,
+            fast_field_readers: &'a Vec<DynamicFastFieldReader<u64>>,
        }
+        impl<'a> FastFieldDataAccess for SortedDocidFieldAccessProvider<'a> {
+            fn get_val(&self, doc: u64) -> u64 {
+                let (doc_id, reader_with_ordinal) = self.doc_id_mapping[doc as usize];
+                self.fast_field_readers[reader_with_ordinal.ordinal as usize].get(doc_id)
+            }
+        }
+        let fastfield_accessor = SortedDocidFieldAccessProvider {
+            doc_id_mapping,
+            fast_field_readers: &fast_field_readers,
+        };
+        let iter1 = doc_id_mapping.iter().map(|(doc_id, reader_with_ordinal)| {
+            let fast_field_reader = &fast_field_readers[reader_with_ordinal.ordinal as usize];
+            fast_field_reader.get(*doc_id)
+        });
+        let iter2 = doc_id_mapping.iter().map(|(doc_id, reader_with_ordinal)| {
+            let fast_field_reader = &fast_field_readers[reader_with_ordinal.ordinal as usize];
+            fast_field_reader.get(*doc_id)
+        });
+        fast_field_serializer.create_auto_detect_u64_fast_field(
+            field,
+            stats,
+            fastfield_accessor,
+            iter1,
+            iter2,
+        )?;
+
+        Ok(())
    }

    /// Checks if the readers are disjunct for their sort property and in the correct order to be
@@ -434,7 +410,7 @@ impl IndexMerger {
        reader: &SegmentReader,
        sort_by_field: &IndexSortByField,
    ) -> crate::Result<impl FastFieldReader<u64>> {
-        let field_id = expect_field_id_for_sort_field(&reader.schema(), &sort_by_field)?; // for now expect fastfield, but not strictly required
+        let field_id = expect_field_id_for_sort_field(reader.schema(), sort_by_field)?; // for now expect fastfield, but not strictly required
        let value_accessor = reader.fast_fields().u64_lenient(field_id)?;
        Ok(value_accessor)
    }
@@ -469,7 +445,7 @@ impl IndexMerger {
    pub(crate) fn generate_doc_id_mapping(
        &self,
        sort_by_field: &IndexSortByField,
-    ) -> crate::Result<Vec<(DocId, SegmentReaderWithOrdinal)>> {
+    ) -> crate::Result<SegmentDocidMapping> {
        let reader_and_field_accessors = self.get_reader_with_sort_field_accessor(sort_by_field)?;
        // Loading the field accessor on demand causes a 15x regression

@@ -505,7 +481,7 @@ impl IndexMerger {
            })
            .map(|(doc_id, reader_with_id, _)| (doc_id, reader_with_id))
            .collect::<Vec<_>>();
-        Ok(sorted_doc_ids)
+        Ok(SegmentDocidMapping::new(sorted_doc_ids, false))
    }

    // Creating the index file to point into the data, generic over `BytesFastFieldReader` and
@@ -517,18 +493,18 @@ impl IndexMerger {
    fn write_1_n_fast_field_idx_generic<T: MultiValueLength>(
        field: Field,
        fast_field_serializer: &mut CompositeFastFieldSerializer,
-        doc_id_mapping: &Option<Vec<(DocId, SegmentReaderWithOrdinal)>>,
+        doc_id_mapping: &SegmentDocidMapping,
        reader_and_field_accessors: &[(&SegmentReader, T)],
-    ) -> crate::Result<()> {
+    ) -> crate::Result<Vec<u64>> {
        let mut total_num_vals = 0u64;
        // In the first pass, we compute the total number of vals.
        //
        // This is required by the bitpacker, as it needs to know
        // what should be the bit length use for bitpacking.
-        let mut idx_num_vals = 0;
+        let mut num_docs = 0;
        for (reader, u64s_reader) in reader_and_field_accessors.iter() {
            if let Some(delete_bitset) = reader.delete_bitset() {
-                idx_num_vals += reader.max_doc() as u64 - delete_bitset.len() as u64;
+                num_docs += reader.max_doc() as u64 - delete_bitset.len() as u64;
                for doc in 0u32..reader.max_doc() {
                    if delete_bitset.is_alive(doc) {
                        let num_vals = u64s_reader.get_len(doc) as u64;
@@ -536,68 +512,50 @@ impl IndexMerger {
                    }
                }
            } else {
-                idx_num_vals += reader.max_doc() as u64;
+                num_docs += reader.max_doc() as u64;
                total_num_vals += u64s_reader.get_total_len();
            }
        }

        let stats = FastFieldStats {
            max_value: total_num_vals,
-            num_vals: idx_num_vals,
+            // The fastfield offset index contains (num_docs + 1) values.
+            num_vals: num_docs + 1,
            min_value: 0,
        };
        // We can now create our `idx` serializer, and in a second pass,
        // can effectively push the different indexes.
-        if let Some(doc_id_mapping) = doc_id_mapping {
-            // copying into a temp vec is not ideal, but the fast field codec api requires random
-            // access, which is used in the estimation. It's possible to 1. calculate random
-            // acccess on the fly or 2. change the codec api to make random access optional, but
-            // they both have also major drawbacks.

-            let mut offsets = vec![];
-            let mut offset = 0;
-            for (doc_id, reader) in doc_id_mapping {
-                let reader = &reader_and_field_accessors[reader.ordinal as usize].1;
-                offsets.push(offset);
-                offset += reader.get_len(*doc_id) as u64;
-            }
+        // copying into a temp vec is not ideal, but the fast field codec api requires random
+        // access, which is used in the estimation. It's possible to 1. calculate random
+        // acccess on the fly or 2. change the codec api to make random access optional, but
+        // they both have also major drawbacks.
+
+        let mut offsets = vec![];
+        let mut offset = 0;
+        for (doc_id, reader) in doc_id_mapping.iter() {
+            let reader = &reader_and_field_accessors[reader.ordinal as usize].1;
            offsets.push(offset);
-
-            fast_field_serializer.create_auto_detect_u64_fast_field(
-                field,
-                stats,
-                &offsets,
-                offsets.iter().cloned(),
-                offsets.iter().cloned(),
-            )?;
-        } else {
-            let mut offsets = vec![];
-            let mut offset = 0;
-            for (segment_reader, u64s_reader) in reader_and_field_accessors.iter() {
-                for doc in segment_reader.doc_ids_alive() {
-                    offsets.push(offset);
-                    offset += u64s_reader.get_len(doc) as u64;
-                }
-            }
-            offsets.push(offset);
-
-            fast_field_serializer.create_auto_detect_u64_fast_field(
-                field,
-                stats,
-                &offsets,
-                offsets.iter().cloned(),
-                offsets.iter().cloned(),
-            )?;
+            offset += reader.get_len(*doc_id) as u64;
        }
+        offsets.push(offset);

-        Ok(())
+        fast_field_serializer.create_auto_detect_u64_fast_field(
+            field,
+            stats,
+            &offsets[..],
+            offsets.iter().cloned(),
+            offsets.iter().cloned(),
+        )?;
+        Ok(offsets)
    }
+    /// Returns the fastfield index (index for the data, not the data).
    fn write_multi_value_fast_field_idx(
        &self,
        field: Field,
        fast_field_serializer: &mut CompositeFastFieldSerializer,
-        doc_id_mapping: &Option<Vec<(DocId, SegmentReaderWithOrdinal)>>,
-    ) -> crate::Result<()> {
+        doc_id_mapping: &SegmentDocidMapping,
+    ) -> crate::Result<Vec<u64>> {
        let reader_and_field_accessors = self.readers.iter().map(|reader|{
            let u64s_reader: MultiValuedFastFieldReader<u64> = reader.fast_fields()
                .typed_fast_field_multi_reader(field)
@@ -618,9 +576,11 @@ impl IndexMerger {
        field: Field,
        term_ordinal_mappings: &TermOrdinalMapping,
        fast_field_serializer: &mut CompositeFastFieldSerializer,
-        doc_id_mapping: &Option<Vec<(DocId, SegmentReaderWithOrdinal)>>,
+        doc_id_mapping: &SegmentDocidMapping,
    ) -> crate::Result<()> {
-        // Multifastfield consists in 2 fastfields.
+        debug_time!("write_hierarchical_facet_field");
+
+        // Multifastfield consists of 2 fastfields.
        // The first serves as an index into the second one and is stricly increasing.
        // The second contains the actual values.

@@ -645,53 +605,61 @@ impl IndexMerger {
            let mut serialize_vals =
                fast_field_serializer.new_u64_fast_field_with_idx(field, 0u64, max_term_ord, 1)?;
            let mut vals = Vec::with_capacity(100);
-            if let Some(doc_id_mapping) = doc_id_mapping {
-                for (old_doc_id, reader_with_ordinal) in doc_id_mapping {
-                    let term_ordinal_mapping: &[TermOrdinal] =
-                        term_ordinal_mappings.get_segment(reader_with_ordinal.ordinal as usize);

-                    let ff_reader = &fast_field_reader[reader_with_ordinal.ordinal as usize];
-                    ff_reader.get_vals(*old_doc_id, &mut vals);
-                    for &prev_term_ord in &vals {
-                        let new_term_ord = term_ordinal_mapping[prev_term_ord as usize];
-                        serialize_vals.add_val(new_term_ord)?;
-                    }
-                }
-            } else {
-                for (segment_ord, segment_reader) in self.readers.iter().enumerate() {
-                    let term_ordinal_mapping: &[TermOrdinal] =
-                        term_ordinal_mappings.get_segment(segment_ord);
-                    let ff_reader = &fast_field_reader[segment_ord as usize];
-                    // TODO optimize if no deletes
-                    for doc in segment_reader.doc_ids_alive() {
-                        ff_reader.get_vals(doc, &mut vals);
-                        for &prev_term_ord in &vals {
-                            let new_term_ord = term_ordinal_mapping[prev_term_ord as usize];
-                            serialize_vals.add_val(new_term_ord)?;
-                        }
-                    }
+            for (old_doc_id, reader_with_ordinal) in doc_id_mapping.iter() {
+                let term_ordinal_mapping: &[TermOrdinal] =
+                    term_ordinal_mappings.get_segment(reader_with_ordinal.ordinal as usize);
+
+                let ff_reader = &fast_field_reader[reader_with_ordinal.ordinal as usize];
+                ff_reader.get_vals(*old_doc_id, &mut vals);
+                for &prev_term_ord in &vals {
+                    let new_term_ord = term_ordinal_mapping[prev_term_ord as usize];
+                    serialize_vals.add_val(new_term_ord)?;
                }
            }
+
            serialize_vals.close_field()?;
        }
        Ok(())
    }

+    /// Creates a mapping if the segments are stacked. this is helpful to merge codelines between index
+    /// sorting and the others
+    pub(crate) fn get_doc_id_from_concatenated_data(&self) -> crate::Result<SegmentDocidMapping> {
+        let mapping: Vec<_> = self
+            .readers
+            .iter()
+            .enumerate()
+            .map(|(ordinal, reader)| {
+                let reader_with_ordinal = SegmentReaderWithOrdinal {
+                    ordinal: ordinal as u32,
+                    reader,
+                };
+                reader
+                    .doc_ids_alive()
+                    .map(move |doc_id| (doc_id, reader_with_ordinal))
+            })
+            .flatten()
+            .collect();
+        Ok(SegmentDocidMapping::new(mapping, true))
+    }
    fn write_multi_fast_field(
        &self,
        field: Field,
        fast_field_serializer: &mut CompositeFastFieldSerializer,
-        doc_id_mapping: &Option<Vec<(DocId, SegmentReaderWithOrdinal)>>,
+        doc_id_mapping: &SegmentDocidMapping,
    ) -> crate::Result<()> {
        // Multifastfield consists in 2 fastfields.
        // The first serves as an index into the second one and is stricly increasing.
        // The second contains the actual values.

        // First we merge the idx fast field.
-        self.write_multi_value_fast_field_idx(field, fast_field_serializer, doc_id_mapping)?;
+        let offsets =
+            self.write_multi_value_fast_field_idx(field, fast_field_serializer, doc_id_mapping)?;

        let mut min_value = u64::max_value();
        let mut max_value = u64::min_value();
+        let mut num_vals = 0;

        let mut vals = Vec::with_capacity(100);

@@ -717,6 +685,7 @@ impl IndexMerger {
                    min_value = cmp::min(val, min_value);
                    max_value = cmp::max(val, max_value);
                }
+                num_vals += vals.len();
            }
            ff_readers.push(ff_reader);
            // TODO optimize when no deletes
@@ -727,40 +696,76 @@ impl IndexMerger {
            max_value = 0;
        }

-        let fast_field_reader = self
-            .readers
-            .iter()
-            .map(|reader| {
-                let ff_reader : MultiValuedFastFieldReader<u64> = reader.fast_fields()
-                .typed_fast_field_multi_reader(field)
-                .expect("Failed to find index for multivalued field. This is a bug in tantivy, please report.");
-                ff_reader
-            })
-            .collect::<Vec<_>>();
-
        // We can now initialize our serializer, and push it the different values
-        let mut serialize_vals =
-            fast_field_serializer.new_u64_fast_field_with_idx(field, min_value, max_value, 1)?;
-        if let Some(doc_id_mapping) = doc_id_mapping {
-            for (doc_id, reader_with_ordinal) in doc_id_mapping {
-                let ff_reader = &fast_field_reader[reader_with_ordinal.ordinal as usize];
-                ff_reader.get_vals(*doc_id, &mut vals);
-                for &val in &vals {
-                    serialize_vals.add_val(val)?;
-                }
-            }
-        } else {
-            for (reader, ff_reader) in self.readers.iter().zip(ff_readers) {
-                // TODO optimize if no deletes
-                for doc in reader.doc_ids_alive() {
-                    ff_reader.get_vals(doc, &mut vals);
-                    for &val in &vals {
-                        serialize_vals.add_val(val)?;
-                    }
-                }
+        let stats = FastFieldStats {
+            max_value,
+            num_vals: num_vals as u64,
+            min_value,
+        };
+
+        struct SortedDocidMultiValueAccessProvider<'a> {
+            doc_id_mapping: &'a SegmentDocidMapping<'a>,
+            fast_field_readers: &'a Vec<MultiValuedFastFieldReader<u64>>,
+            offsets: Vec<u64>,
+        }
+        impl<'a> FastFieldDataAccess for SortedDocidMultiValueAccessProvider<'a> {
+            fn get_val(&self, pos: u64) -> u64 {
+                // use the offsets index to find the doc_id which will contain the position.
+                // the offsets are stricly increasing so we can do a simple search on it.
+                let new_docid = self
+                    .offsets
+                    .iter()
+                    .position(|&offset| offset > pos)
+                    .expect("pos is out of bounds")
+                    - 1;
+
+                // now we need to find the position of `pos` in the multivalued bucket
+                let num_pos_covered_until_now = self.offsets[new_docid];
+                let pos_in_values = pos - num_pos_covered_until_now;
+
+                let (old_doc_id, reader_with_ordinal) = self.doc_id_mapping[new_docid as usize];
+                let num_vals = self.fast_field_readers[reader_with_ordinal.ordinal as usize]
+                    .get_len(old_doc_id);
+                assert!(num_vals >= pos_in_values);
+                let mut vals = vec![];
+                self.fast_field_readers[reader_with_ordinal.ordinal as usize]
+                    .get_vals(old_doc_id, &mut vals);
+
+                vals[pos_in_values as usize]
            }
        }
-        serialize_vals.close_field()?;
+        let fastfield_accessor = SortedDocidMultiValueAccessProvider {
+            doc_id_mapping,
+            fast_field_readers: &ff_readers,
+            offsets,
+        };
+        let iter1 = doc_id_mapping
+            .iter()
+            .map(|(doc_id, reader_with_ordinal)| {
+                let ff_reader = &ff_readers[reader_with_ordinal.ordinal as usize];
+                let mut vals = vec![];
+                ff_reader.get_vals(*doc_id, &mut vals);
+                vals.into_iter()
+            })
+            .flatten();
+        let iter2 = doc_id_mapping
+            .iter()
+            .map(|(doc_id, reader_with_ordinal)| {
+                let ff_reader = &ff_readers[reader_with_ordinal.ordinal as usize];
+                let mut vals = vec![];
+                ff_reader.get_vals(*doc_id, &mut vals);
+                vals.into_iter()
+            })
+            .flatten();
+        fast_field_serializer.create_auto_detect_u64_fast_field_with_idx(
+            field,
+            stats,
+            fastfield_accessor,
+            iter1,
+            iter2,
+            1,
+        )?;
+
        Ok(())
    }

@@ -768,7 +773,7 @@ impl IndexMerger {
        &self,
        field: Field,
        fast_field_serializer: &mut CompositeFastFieldSerializer,
-        doc_id_mapping: &Option<Vec<(DocId, SegmentReaderWithOrdinal)>>,
+        doc_id_mapping: &SegmentDocidMapping,
    ) -> crate::Result<()> {
        let reader_and_field_accessors = self
            .readers
@@ -787,24 +792,13 @@ impl IndexMerger {
            &reader_and_field_accessors,
        )?;
        let mut serialize_vals = fast_field_serializer.new_bytes_fast_field_with_idx(field, 1);
-        if let Some(doc_id_mapping) = doc_id_mapping {
-            for (doc_id, reader_with_ordinal) in doc_id_mapping {
-                let bytes_reader =
-                    &reader_and_field_accessors[reader_with_ordinal.ordinal as usize].1;
-                let val = bytes_reader.get_bytes(*doc_id);
-                serialize_vals.write_all(val)?;
-            }
-        } else {
-            for segment_reader in &self.readers {
-                let bytes_reader = segment_reader.fast_fields().bytes(field)
-                .expect("Failed to find bytes field in fast field reader. This is a bug in tantivy. Please report.");
-                // TODO: optimize if no deletes
-                for doc in segment_reader.doc_ids_alive() {
-                    let val = bytes_reader.get_bytes(doc);
-                    serialize_vals.write_all(val)?;
-                }
-            }
+
+        for (doc_id, reader_with_ordinal) in doc_id_mapping.iter() {
+            let bytes_reader = &reader_and_field_accessors[reader_with_ordinal.ordinal as usize].1;
+            let val = bytes_reader.get_bytes(*doc_id);
+            serialize_vals.write_all(val)?;
        }
+
        serialize_vals.flush()?;
        Ok(())
    }
@@ -815,8 +809,9 @@ impl IndexMerger {
        field_type: &FieldType,
        serializer: &mut InvertedIndexSerializer,
        fieldnorm_reader: Option<FieldNormReader>,
-        doc_id_mapping: &Option<Vec<(DocId, SegmentReaderWithOrdinal)>>,
+        doc_id_mapping: &SegmentDocidMapping,
    ) -> crate::Result<Option<TermOrdinalMapping>> {
+        debug_time!("write_postings_for_field");
        let mut positions_buffer: Vec<u32> = Vec::with_capacity(1_000);
        let mut delta_computer = DeltaComputer::new();

@@ -841,40 +836,23 @@ impl IndexMerger {
        };

        let mut merged_terms = TermMerger::new(field_term_streams);
-        let mut max_doc = 0;

        // map from segment doc ids to the resulting merged segment doc id.
-        let mut merged_doc_id_map: Vec<Vec<Option<DocId>>> = Vec::with_capacity(self.readers.len());

-        if let Some(doc_id_mapping) = doc_id_mapping {
-            merged_doc_id_map = self
-                .readers
-                .iter()
-                .map(|reader| {
-                    let mut segment_local_map = vec![];
-                    segment_local_map.resize(reader.max_doc() as usize, None);
-                    segment_local_map
-                })
-                .collect();
-            for (new_doc_id, (old_doc_id, segment_and_ordinal)) in doc_id_mapping.iter().enumerate()
-            {
-                let segment_map = &mut merged_doc_id_map[segment_and_ordinal.ordinal as usize];
-                segment_map[*old_doc_id as usize] = Some(new_doc_id as DocId);
-            }
-        } else {
-            for reader in &self.readers {
-                let mut segment_local_map = Vec::with_capacity(reader.max_doc() as usize);
-                for doc_id in 0..reader.max_doc() {
-                    if reader.is_deleted(doc_id) {
-                        segment_local_map.push(None);
-                    } else {
-                        segment_local_map.push(Some(max_doc));
-                        max_doc += 1u32;
-                    }
-                }
-                merged_doc_id_map.push(segment_local_map);
-            }
+        let mut merged_doc_id_map: Vec<Vec<Option<DocId>>> = self
+            .readers
+            .iter()
+            .map(|reader| {
+                let mut segment_local_map = vec![];
+                segment_local_map.resize(reader.max_doc() as usize, None);
+                segment_local_map
+            })
+            .collect();
+        for (new_doc_id, (old_doc_id, segment_and_ordinal)) in doc_id_mapping.iter().enumerate() {
+            let segment_map = &mut merged_doc_id_map[segment_and_ordinal.ordinal as usize];
+            segment_map[*old_doc_id as usize] = Some(new_doc_id as DocId);
        }
+
        // The total number of tokens will only be exact when there has been no deletes.
        //
        // Otherwise, we approximate by removing deleted documents proportionally.
@@ -970,7 +948,7 @@ impl IndexMerger {
                        // I think this is not strictly necessary, it would be possible to
                        // avoid the loading into a vec via some form of kmerge, but then the merge
                        // logic would deviate much more from the stacking case (unsorted index)
-                        if doc_id_mapping.is_some() {
+                        if !doc_id_mapping.is_trivial() {
                            doc_id_and_positions.push((
                                remapped_doc_id,
                                term_freq,
@@ -985,14 +963,15 @@ impl IndexMerger {
                    doc = segment_postings.advance();
                }
            }
-            if doc_id_mapping.is_some() {
+            if !doc_id_mapping.is_trivial() {
                doc_id_and_positions.sort_unstable_by_key(|&(doc_id, _, _)| doc_id);
+
                for (doc_id, term_freq, positions) in &doc_id_and_positions {
-                    field_serializer.write_doc(*doc_id, *term_freq, positions);
+                    let delta_positions = delta_computer.compute_delta(positions);
+                    field_serializer.write_doc(*doc_id, *term_freq, delta_positions);
                }
                doc_id_and_positions.clear();
            }
-
            // closing the term.
            field_serializer.close_term()?;
        }
@@ -1004,7 +983,7 @@ impl IndexMerger {
        &self,
        serializer: &mut InvertedIndexSerializer,
        fieldnorm_readers: FieldNormReaders,
-        doc_id_mapping: &Option<Vec<(DocId, SegmentReaderWithOrdinal)>>,
+        doc_id_mapping: &SegmentDocidMapping,
    ) -> crate::Result<HashMap<Field, TermOrdinalMapping>> {
        let mut term_ordinal_mappings = HashMap::new();
        for (field, field_entry) in self.schema.fields() {
@@ -1027,8 +1006,10 @@ impl IndexMerger {
    fn write_storable_fields(
        &self,
        store_writer: &mut StoreWriter,
-        doc_id_mapping: &Option<Vec<(DocId, SegmentReaderWithOrdinal)>>,
+        doc_id_mapping: &SegmentDocidMapping,
    ) -> crate::Result<()> {
+        debug_time!("write_storable_fields");
+
        let store_readers: Vec<_> = self
            .readers
            .iter()
@@ -1039,8 +1020,8 @@ impl IndexMerger {
            .enumerate()
            .map(|(i, store)| store.iter_raw(self.readers[i].delete_bitset()))
            .collect();
-        if let Some(doc_id_mapping) = doc_id_mapping {
-            for (old_doc_id, reader_with_ordinal) in doc_id_mapping {
+        if !doc_id_mapping.is_trivial() {
+            for (old_doc_id, reader_with_ordinal) in doc_id_mapping.iter() {
                let doc_bytes_it = &mut document_iterators[reader_with_ordinal.ordinal as usize];
                if let Some(doc_bytes_res) = doc_bytes_it.next() {
                    let doc_bytes = doc_bytes_res?;
@@ -1084,25 +1065,24 @@ impl IndexMerger {
        }
        Ok(())
    }
-}

-impl SerializableSegment for IndexMerger {
-    fn write(
-        &self,
-        mut serializer: SegmentSerializer,
-        _: Option<&DocIdMapping>,
-    ) -> crate::Result<u32> {
+    /// Writes the merged segment by pushing information
+    /// to the `SegmentSerializer`.
+    ///
+    /// # Returns
+    /// The number of documents in the resulting segment.
+    pub fn write(&self, mut serializer: SegmentSerializer) -> crate::Result<u32> {
        let doc_id_mapping = if let Some(sort_by_field) = self.index_settings.sort_by_field.as_ref()
        {
            // If the documents are already sorted and stackable, we ignore the mapping and execute
            // it as if there was no sorting
            if self.is_disjunct_and_sorted_on_sort_property(sort_by_field)? {
-                None
+                self.get_doc_id_from_concatenated_data()?
            } else {
-                Some(self.generate_doc_id_mapping(sort_by_field)?)
+                self.generate_doc_id_mapping(sort_by_field)?
            }
        } else {
-            None
+            self.get_doc_id_from_concatenated_data()?
        };

        if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
@@ -2102,4 +2082,11 @@ mod tests {

        Ok(())
    }
+
+    #[test]
+    fn test_max_doc() {
+        // this is the first time I write a unit test for a constant.
+        assert!(((super::MAX_DOC_LIMIT - 1) as i32) >= 0);
+        assert!((super::MAX_DOC_LIMIT as i32) < 0);
+    }
 }
--- a/src/indexer/merger_sorted_index_test.rs
+++ b/src/indexer/merger_sorted_index_test.rs
@@ -1,6 +1,7 @@
 #[cfg(test)]
 mod tests {
-    use crate::fastfield::FastFieldReader;
+    use crate::fastfield::{DeleteBitSet, FastFieldReader};
+    use crate::schema::IndexRecordOption;
    use crate::{
        collector::TopDocs,
        schema::{Cardinality, TextFieldIndexing},
@@ -16,7 +17,7 @@ mod tests {
        schema::{self, BytesOptions},
        DocAddress,
    };
-    use crate::{IndexSettings, Term};
+    use crate::{DocSet, IndexSettings, Postings, Term};
    use futures::executor::block_on;

    fn create_test_index_posting_list_issue(index_settings: Option<IndexSettings>) -> Index {
@@ -104,9 +105,11 @@ mod tests {
            index_writer.add_document(
                doc!(int_field=>3_u64, multi_numbers => 3_u64, multi_numbers => 4_u64, bytes_field => vec![1, 2, 3], text_field => "some text", facet_field=> Facet::from("/book/crime")),
            );
-            index_writer.add_document(doc!(int_field=>1_u64, text_field=> "deleteme"));
            index_writer.add_document(
-                doc!(int_field=>2_u64, multi_numbers => 2_u64, multi_numbers => 3_u64),
+                doc!(int_field=>1_u64, text_field=> "deleteme",  text_field => "ok text more text"),
+            );
+            index_writer.add_document(
+                doc!(int_field=>2_u64, multi_numbers => 2_u64, multi_numbers => 3_u64, text_field => "ok text more text"),
            );

            assert!(index_writer.commit().is_ok());
@@ -118,7 +121,7 @@ mod tests {
            } else {
                1
            };
-            index_writer.add_document(doc!(int_field=>in_val, text_field=> "deleteme", facet_field=> Facet::from("/book/crime")));
+            index_writer.add_document(doc!(int_field=>in_val, text_field=> "deleteme" , text_field => "ok text more text", facet_field=> Facet::from("/book/crime")));
            assert!(index_writer.commit().is_ok());
            // segment 3 - range 5-1000, with force_disjunct_segment_sort_values 50-1000
            let int_vals = if force_disjunct_segment_sort_values {
@@ -243,6 +246,36 @@ mod tests {
            assert_eq!(do_search("biggest"), vec![0]);
        }

+        // postings file
+        {
+            let my_text_field = index.schema().get_field("text_field").unwrap();
+            let term_a = Term::from_field_text(my_text_field, "text");
+            let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
+            let mut postings = inverted_index
+                .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
+                .unwrap()
+                .unwrap();
+
+            assert_eq!(postings.doc_freq(), 2);
+            let fallback_bitset = DeleteBitSet::for_test(&[0], 100);
+            assert_eq!(
+                postings.doc_freq_given_deletes(
+                    segment_reader.delete_bitset().unwrap_or(&fallback_bitset)
+                ),
+                2
+            );
+
+            assert_eq!(postings.term_freq(), 1);
+            let mut output = vec![];
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1]);
+            postings.advance();
+
+            assert_eq!(postings.term_freq(), 2);
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1, 3]);
+        }
+
        // access doc store
        {
            let blubber_pos = if force_disjunct_segment_sort_values {
@@ -260,6 +293,69 @@ mod tests {
        }
    }

+    #[test]
+    fn test_merge_unsorted_index() {
+        let index = create_test_index(
+            Some(IndexSettings {
+                ..Default::default()
+            }),
+            false,
+        );
+
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();
+        assert_eq!(searcher.segment_readers().len(), 1);
+        let segment_reader = searcher.segment_readers().last().unwrap();
+
+        let searcher = index.reader().unwrap().searcher();
+        {
+            let my_text_field = index.schema().get_field("text_field").unwrap();
+
+            let do_search = |term: &str| {
+                let query = QueryParser::for_index(&index, vec![my_text_field])
+                    .parse_query(term)
+                    .unwrap();
+                let top_docs: Vec<(f32, DocAddress)> =
+                    searcher.search(&query, &TopDocs::with_limit(3)).unwrap();
+
+                top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>()
+            };
+
+            assert_eq!(do_search("some"), vec![1]);
+            assert_eq!(do_search("blubber"), vec![3]);
+            assert_eq!(do_search("biggest"), vec![4]);
+        }
+
+        // postings file
+        {
+            let my_text_field = index.schema().get_field("text_field").unwrap();
+            let term_a = Term::from_field_text(my_text_field, "text");
+            let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
+            let mut postings = inverted_index
+                .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
+                .unwrap()
+                .unwrap();
+            assert_eq!(postings.doc_freq(), 2);
+            let fallback_bitset = DeleteBitSet::for_test(&[0], 100);
+            assert_eq!(
+                postings.doc_freq_given_deletes(
+                    segment_reader.delete_bitset().unwrap_or(&fallback_bitset)
+                ),
+                2
+            );
+
+            assert_eq!(postings.term_freq(), 1);
+            let mut output = vec![];
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1]);
+            postings.advance();
+
+            assert_eq!(postings.term_freq(), 2);
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1, 3]);
+        }
+    }
+
    #[test]
    fn test_merge_sorted_index_asc() {
        let index = create_test_index(
@@ -314,7 +410,7 @@ mod tests {
            let my_text_field = index.schema().get_field("text_field").unwrap();
            let fieldnorm_reader = segment_reader.get_fieldnorms_reader(my_text_field).unwrap();
            assert_eq!(fieldnorm_reader.fieldnorm(0), 0);
-            assert_eq!(fieldnorm_reader.fieldnorm(1), 0);
+            assert_eq!(fieldnorm_reader.fieldnorm(1), 4);
            assert_eq!(fieldnorm_reader.fieldnorm(2), 2); // some text
            assert_eq!(fieldnorm_reader.fieldnorm(3), 1);
            assert_eq!(fieldnorm_reader.fieldnorm(5), 3); // the biggest num
@@ -339,6 +435,34 @@ mod tests {
            assert_eq!(do_search("biggest"), vec![5]);
        }

+        // postings file
+        {
+            let my_text_field = index.schema().get_field("text_field").unwrap();
+            let term_a = Term::from_field_text(my_text_field, "text");
+            let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
+            let mut postings = inverted_index
+                .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
+                .unwrap()
+                .unwrap();
+
+            assert_eq!(postings.doc_freq(), 2);
+            let fallback_bitset = DeleteBitSet::for_test(&[0], 100);
+            assert_eq!(
+                postings.doc_freq_given_deletes(
+                    segment_reader.delete_bitset().unwrap_or(&fallback_bitset)
+                ),
+                2
+            );
+
+            let mut output = vec![];
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1, 3]);
+            postings.advance();
+
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1]);
+        }
+
        // access doc store
        {
            let doc = searcher.doc(DocAddress::new(0, 0)).unwrap();
--- a/src/indexer/segment_entry.rs
+++ b/src/indexer/segment_entry.rs
@@ -1,7 +1,7 @@
-use crate::common::BitSet;
 use crate::core::SegmentId;
 use crate::core::SegmentMeta;
 use crate::indexer::delete_queue::DeleteCursor;
+use common::BitSet;
 use std::fmt;

 /// A segment entry describes the state of
--- a/src/indexer/segment_manager.rs
+++ b/src/indexer/segment_manager.rs
@@ -32,6 +32,12 @@ impl SegmentRegisters {
        } else if self.committed.contains_all(segment_ids) {
            Some(SegmentsStatus::Committed)
        } else {
+            warn!(
+                "segment_ids: {:?}, committed_ids: {:?}, uncommitted_ids {:?}",
+                segment_ids,
+                self.committed.segment_ids(),
+                self.uncommitted.segment_ids()
+            );
            None
        }
    }
@@ -58,21 +64,6 @@ impl Debug for SegmentManager {
    }
 }

-pub fn get_mergeable_segments(
-    in_merge_segment_ids: &HashSet<SegmentId>,
-    segment_manager: &SegmentManager,
-) -> (Vec<SegmentMeta>, Vec<SegmentMeta>) {
-    let registers_lock = segment_manager.read();
-    (
-        registers_lock
-            .committed
-            .get_mergeable_segments(in_merge_segment_ids),
-        registers_lock
-            .uncommitted
-            .get_mergeable_segments(in_merge_segment_ids),
-    )
-}
-
 impl SegmentManager {
    pub fn from_segments(
        segment_metas: Vec<SegmentMeta>,
@@ -86,6 +77,20 @@ impl SegmentManager {
        }
    }

+    pub fn get_mergeable_segments(
+        &self,
+        in_merge_segment_ids: &HashSet<SegmentId>,
+    ) -> (Vec<SegmentMeta>, Vec<SegmentMeta>) {
+        let registers_lock = self.read();
+        (
+            registers_lock
+                .committed
+                .get_mergeable_segments(in_merge_segment_ids),
+            registers_lock
+                .uncommitted
+                .get_mergeable_segments(in_merge_segment_ids),
+        )
+    }
    /// Returns all of the segment entries (committed or uncommitted)
    pub fn segment_entries(&self) -> Vec<SegmentEntry> {
        let registers_lock = self.read();
--- a/src/indexer/segment_register.rs
+++ b/src/indexer/segment_register.rs
@@ -4,6 +4,7 @@ use crate::indexer::delete_queue::DeleteCursor;
 use crate::indexer::segment_entry::SegmentEntry;
 use std::collections::HashMap;
 use std::collections::HashSet;
+use std::fmt::Display;
 use std::fmt::{self, Debug, Formatter};

 /// The segment register keeps track
@@ -29,6 +30,16 @@ impl Debug for SegmentRegister {
        Ok(())
    }
 }
+impl Display for SegmentRegister {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
+        write!(f, "SegmentRegister(")?;
+        for k in self.segment_states.keys() {
+            write!(f, "{}, ", k.short_uuid_string())?;
+        }
+        write!(f, ")")?;
+        Ok(())
+    }
+}

 impl SegmentRegister {
    pub fn clear(&mut self) {
@@ -46,6 +57,10 @@ impl SegmentRegister {
            .collect()
    }

+    pub fn segment_ids(&self) -> Vec<SegmentId> {
+        self.segment_states.keys().cloned().collect()
+    }
+
    pub fn segment_entries(&self) -> Vec<SegmentEntry> {
        self.segment_states.values().cloned().collect()
    }
--- a/src/indexer/segment_updater.rs
+++ b/src/indexer/segment_updater.rs
@@ -1,11 +1,10 @@
-use super::segment_manager::{get_mergeable_segments, SegmentManager};
+use super::segment_manager::SegmentManager;
 use crate::core::Index;
 use crate::core::IndexMeta;
 use crate::core::IndexSettings;
 use crate::core::Segment;
 use crate::core::SegmentId;
 use crate::core::SegmentMeta;
-use crate::core::SerializableSegment;
 use crate::core::META_FILEPATH;
 use crate::directory::{Directory, DirectoryClone, GarbageCollectionResult};
 use crate::indexer::delete_queue::DeleteCursor;
@@ -140,7 +139,7 @@ fn merge(
    // ... we just serialize this index merger in our new segment to merge the segments.
    let segment_serializer = SegmentSerializer::for_segment(merged_segment.clone(), true)?;

-    let num_docs = merger.write(segment_serializer, None)?;
+    let num_docs = merger.write(segment_serializer)?;

    let merged_segment_id = merged_segment.id();

@@ -209,7 +208,7 @@ pub fn merge_segments<Dir: Directory>(
        &segments[..],
    )?;
    let segment_serializer = SegmentSerializer::for_segment(merged_segment, true)?;
-    let num_docs = merger.write(segment_serializer, None)?;
+    let num_docs = merger.write(segment_serializer)?;

    let segment_meta = merged_index.new_segment_meta(merged_segment_id, num_docs);

@@ -528,10 +527,14 @@ impl SegmentUpdater {
        }))
    }

-    async fn consider_merge_options(&self) {
+    pub(crate) fn get_mergeable_segments(&self) -> (Vec<SegmentMeta>, Vec<SegmentMeta>) {
        let merge_segment_ids: HashSet<SegmentId> = self.merge_operations.segment_in_merge();
-        let (committed_segments, uncommitted_segments) =
-            get_mergeable_segments(&merge_segment_ids, &self.segment_manager);
+        self.segment_manager
+            .get_mergeable_segments(&merge_segment_ids)
+    }
+
+    async fn consider_merge_options(&self) {
+        let (committed_segments, uncommitted_segments) = self.get_mergeable_segments();

        // Committed segments cannot be merged with uncommitted_segments.
        // We therefore consider merges using these two sets of segments independently.
@@ -717,7 +720,7 @@ mod tests {

        let seg_ids = index.searchable_segment_ids()?;
        // docs exist, should have at least 1 segment
-        assert!(seg_ids.len() > 0);
+        assert!(!seg_ids.is_empty());

        let term_vals = vec!["a", "b", "c", "d", "e", "f"];
        for term_val in term_vals {
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -12,12 +12,12 @@ use crate::schema::Schema;
 use crate::schema::Term;
 use crate::schema::Value;
 use crate::schema::{Field, FieldEntry};
+use crate::store::StoreReader;
 use crate::tokenizer::{BoxTokenStream, PreTokenizedStream};
 use crate::tokenizer::{FacetTokenizer, TextAnalyzer};
 use crate::tokenizer::{TokenStreamChain, Tokenizer};
 use crate::Opstamp;
 use crate::{core::Segment, store::StoreWriter};
-use crate::{core::SerializableSegment, store::StoreReader};
 use crate::{DocId, SegmentComponent};

 /// Computes the initial size of the hash table.
@@ -36,6 +36,20 @@ fn initial_table_size(per_thread_memory_budget: usize) -> crate::Result<usize> {
    }
 }

+fn remap_doc_opstamps(
+    opstamps: Vec<Opstamp>,
+    doc_id_mapping_opt: Option<&DocIdMapping>,
+) -> Vec<Opstamp> {
+    if let Some(doc_id_mapping_opt) = doc_id_mapping_opt {
+        doc_id_mapping_opt
+            .iter_old_doc_ids()
+            .map(|doc| opstamps[doc as usize])
+            .collect()
+    } else {
+        opstamps
+    }
+}
+
 /// A `SegmentWriter` is in charge of creating segment index from a
 /// set of documents.
 ///
@@ -112,14 +126,15 @@ impl SegmentWriter {
            .clone()
            .map(|sort_by_field| get_doc_id_mapping_from_field(sort_by_field, &self))
            .transpose()?;
-        write(
+        remap_and_write(
            &self.multifield_postings,
            &self.fast_field_writers,
            &self.fieldnorms_writer,
            self.segment_serializer,
            mapping.as_ref(),
        )?;
-        Ok(self.doc_opstamps)
+        let doc_opstamps = remap_doc_opstamps(self.doc_opstamps, mapping.as_ref());
+        Ok(doc_opstamps)
    }

    pub fn mem_usage(&self) -> usize {
@@ -176,7 +191,7 @@ impl SegmentWriter {
                            .process(&mut |token| {
                                term_buffer.set_text(&token.text);
                                let unordered_term_id =
-                                    multifield_postings.subscribe(doc_id, &term_buffer);
+                                    multifield_postings.subscribe(doc_id, term_buffer);
                                unordered_term_id_opt = Some(unordered_term_id);
                            });
                        if let Some(unordered_term_id) = unordered_term_id_opt {
@@ -237,7 +252,7 @@ impl SegmentWriter {
                            .u64_value()
                            .ok_or_else(make_schema_error)?;
                        term_buffer.set_u64(u64_val);
-                        multifield_postings.subscribe(doc_id, &term_buffer);
+                        multifield_postings.subscribe(doc_id, term_buffer);
                    }
                }
                FieldType::Date(_) => {
@@ -248,7 +263,7 @@ impl SegmentWriter {
                            .date_value()
                            .ok_or_else(make_schema_error)?;
                        term_buffer.set_i64(date_val.timestamp());
-                        multifield_postings.subscribe(doc_id, &term_buffer);
+                        multifield_postings.subscribe(doc_id, term_buffer);
                    }
                }
                FieldType::I64(_) => {
@@ -259,7 +274,7 @@ impl SegmentWriter {
                            .i64_value()
                            .ok_or_else(make_schema_error)?;
                        term_buffer.set_i64(i64_val);
-                        multifield_postings.subscribe(doc_id, &term_buffer);
+                        multifield_postings.subscribe(doc_id, term_buffer);
                    }
                }
                FieldType::F64(_) => {
@@ -270,7 +285,7 @@ impl SegmentWriter {
                            .f64_value()
                            .ok_or_else(make_schema_error)?;
                        term_buffer.set_f64(f64_val);
-                        multifield_postings.subscribe(doc_id, &term_buffer);
+                        multifield_postings.subscribe(doc_id, term_buffer);
                    }
                }
                FieldType::Bytes(_) => {
@@ -281,7 +296,7 @@ impl SegmentWriter {
                            .bytes_value()
                            .ok_or_else(make_schema_error)?;
                        term_buffer.set_bytes(bytes);
-                        self.multifield_postings.subscribe(doc_id, &term_buffer);
+                        self.multifield_postings.subscribe(doc_id, term_buffer);
                    }
                }
            }
@@ -315,8 +330,12 @@ impl SegmentWriter {
    }
 }

-// This method is used as a trick to workaround the borrow checker
-fn write(
+/// This method is used as a trick to workaround the borrow checker
+/// Writes a view of a segment by pushing information
+/// to the `SegmentSerializer`.
+///
+/// `doc_id_map` is used to map to the new doc_id order.
+fn remap_and_write(
    multifield_postings: &MultiFieldPostingsWriter,
    fast_field_writers: &FastFieldsWriter,
    fieldnorms_writer: &FieldNormsWriter,
@@ -340,6 +359,7 @@ fn write(
        &term_ord_map,
        doc_id_map,
    )?;
+
    // finalize temp docstore and create version, which reflects the doc_id_map
    if let Some(doc_id_map) = doc_id_map {
        let store_write = serializer
@@ -356,31 +376,16 @@ fn write(
                .segment()
                .open_read(SegmentComponent::TempStore)?,
        )?;
+
        for old_doc_id in doc_id_map.iter_old_doc_ids() {
-            let doc_bytes = store_read.get_document_bytes(*old_doc_id)?;
+            let doc_bytes = store_read.get_document_bytes(old_doc_id)?;
            serializer.get_store_writer().store_bytes(&doc_bytes)?;
        }
    }
-    serializer.close()?;
-    Ok(())
-}

-impl SerializableSegment for SegmentWriter {
-    fn write(
-        &self,
-        serializer: SegmentSerializer,
-        doc_id_map: Option<&DocIdMapping>,
-    ) -> crate::Result<u32> {
-        let max_doc = self.max_doc;
-        write(
-            &self.multifield_postings,
-            &self.fast_field_writers,
-            &self.fieldnorms_writer,
-            serializer,
-            doc_id_map,
-        )?;
-        Ok(max_doc)
-    }
+    serializer.close()?;
+
+    Ok(())
 }

 #[cfg(test)]
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,13 @@
 #![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")]
 #![cfg_attr(all(feature = "unstable", test), feature(test))]
-#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_inception))]
+#![cfg_attr(
+    feature = "cargo-clippy",
+    allow(
+        clippy::module_inception,
+        clippy::needless_range_loop,
+        clippy::bool_assert_comparison
+    )
+)]
 #![doc(test(attr(allow(unused_variables), deny(warnings))))]
 #![warn(missing_docs)]

@@ -100,7 +107,6 @@

 #[cfg_attr(test, macro_use)]
 extern crate serde_json;
-
 #[macro_use]
 extern crate log;

@@ -129,7 +135,6 @@ pub type Result<T> = std::result::Result<T, TantivyError>;
 /// Tantivy DateTime
 pub type DateTime = chrono::DateTime<chrono::Utc>;

-mod common;
 mod core;
 mod indexer;

@@ -157,8 +162,6 @@ pub use self::snippet::{Snippet, SnippetGenerator};

 mod docset;
 pub use self::docset::{DocSet, TERMINATED};
-pub use crate::common::HasLen;
-pub use crate::common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
 pub use crate::core::{Executor, SegmentComponent};
 pub use crate::core::{
    Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, Order, Searcher, Segment,
@@ -172,6 +175,8 @@ pub use crate::indexer::IndexWriter;
 pub use crate::postings::Postings;
 pub use crate::reader::LeasedItem;
 pub use crate::schema::{Document, Term};
+pub use common::HasLen;
+pub use common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
 use std::fmt;

 use once_cell::sync::Lazy;
@@ -287,7 +292,7 @@ pub struct DocAddress {
 }

 #[cfg(test)]
-mod tests {
+pub mod tests {
    use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
    use crate::core::SegmentReader;
    use crate::docset::{DocSet, TERMINATED};
@@ -298,11 +303,18 @@ mod tests {
    use crate::Index;
    use crate::Postings;
    use crate::ReloadPolicy;
+    use common::{BinarySerializable, FixedSize};
    use rand::distributions::Bernoulli;
    use rand::distributions::Uniform;
    use rand::rngs::StdRng;
    use rand::{Rng, SeedableRng};

+    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
+        let mut buffer = Vec::new();
+        O::default().serialize(&mut buffer).unwrap();
+        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
+    }
+
    /// Checks if left and right are close one to each other.
    /// Panics if the two values are more than 0.5% apart.
    #[macro_export]
@@ -934,7 +946,7 @@ mod tests {
        let id = schema_builder.add_u64_field("id", INDEXED);
        let schema = schema_builder.build();

-        let index = Index::create_in_ram(schema.clone());
+        let index = Index::create_in_ram(schema);
        let index_reader = index.reader()?;

        let mut index_writer = index.writer_for_tests()?;
@@ -973,7 +985,7 @@ mod tests {
        let searcher = index_reader.searcher();
        let segment_ids: Vec<SegmentId> = searcher
            .segment_readers()
-            .into_iter()
+            .iter()
            .map(|reader| reader.segment_id())
            .collect();
        block_on(index_writer.merge(&segment_ids)).unwrap();
@@ -987,8 +999,24 @@ mod tests {
    #[test]
    fn test_validate_checksum() -> crate::Result<()> {
        let index_path = tempfile::tempdir().expect("dir");
-        let schema = Schema::builder().build();
+        let mut builder = Schema::builder();
+        let body = builder.add_text_field("body", TEXT | STORED);
+        let schema = builder.build();
        let index = Index::create_in_dir(&index_path, schema)?;
+        let mut writer = index.writer(50_000_000)?;
+        for _ in 0..5000 {
+            writer.add_document(doc!(body => "foo"));
+            writer.add_document(doc!(body => "boo"));
+        }
+        writer.commit()?;
+        assert!(index.validate_checksum()?.is_empty());
+
+        // delete few docs
+        writer.delete_term(Term::from_field_text(body, "foo"));
+        writer.commit()?;
+        let segment_ids = index.searchable_segment_ids()?;
+        let _ = futures::executor::block_on(writer.merge(&segment_ids));
+
        assert!(index.validate_checksum()?.is_empty());
        Ok(())
    }
--- a/src/positions/mod.rs
+++ b/src/positions/mod.rs
@@ -46,7 +46,7 @@ pub mod tests {
    fn create_positions_data(vals: &[u32]) -> crate::Result<OwnedBytes> {
        let mut positions_buffer = vec![];
        let mut serializer = PositionSerializer::new(&mut positions_buffer);
-        serializer.write_positions_delta(&vals);
+        serializer.write_positions_delta(vals);
        serializer.close_term()?;
        serializer.close()?;
        Ok(OwnedBytes::new(positions_buffer))
@@ -169,7 +169,7 @@ pub mod tests {
        let positions_delta: Vec<u32> = (0..2_000_000).collect();
        let positions_data = create_positions_data(&positions_delta[..])?;
        assert_eq!(positions_data.len(), 5003499);
-        let mut position_reader = PositionReader::open(positions_data.clone())?;
+        let mut position_reader = PositionReader::open(positions_data)?;
        let mut buf = [0u32; 256];
        position_reader.read(128, &mut buf);
        for i in 0..256 {
--- a/src/positions/reader.rs
+++ b/src/positions/reader.rs
@@ -1,9 +1,9 @@
 use std::io;

-use crate::common::{BinarySerializable, VInt};
 use crate::directory::OwnedBytes;
 use crate::positions::COMPRESSION_BLOCK_SIZE;
 use crate::postings::compression::{BlockDecoder, VIntDecoder};
+use common::{BinarySerializable, VInt};

 /// When accessing the position of a term, we get a positions_idx from the `Terminfo`.
 /// This means we need to skip to the `nth` positions efficiently.
--- a/src/positions/serializer.rs
+++ b/src/positions/serializer.rs
@@ -1,7 +1,7 @@
-use crate::common::{BinarySerializable, CountingWriter, VInt};
 use crate::positions::COMPRESSION_BLOCK_SIZE;
 use crate::postings::compression::BlockEncoder;
 use crate::postings::compression::VIntEncoder;
+use common::{BinarySerializable, CountingWriter, VInt};
 use std::io::{self, Write};

 /// The PositionSerializer is in charge of serializing all of the positions
--- a/src/postings/block_search.rs
+++ b/src/postings/block_search.rs
@@ -1,241 +1,109 @@
-use std::ops::Range;
+use crate::postings::compression::COMPRESSION_BLOCK_SIZE;

-use crate::postings::compression::AlignedBuffer;
+unsafe fn binary_search_step(ptr: *const u32, target: u32, half_size: isize) -> *const u32 {
+    let mid = ptr.offset(half_size);
+    if *mid < target {
+        mid.offset(1)
+    } else {
+        ptr
+    }
+}

-/// This modules define the logic used to search for a doc in a given
-/// block. (at most 128 docs)
+/// Search the first index containing an element greater or equal to
+/// the target.
 ///
-/// Searching within a block is a hotspot when running intersection.
-/// so it was worth defining it in its own module.
-
-#[cfg(target_arch = "x86_64")]
-mod sse2 {
-    use crate::postings::compression::{AlignedBuffer, COMPRESSION_BLOCK_SIZE};
-    use std::arch::x86_64::__m128i as DataType;
-    use std::arch::x86_64::_mm_add_epi32 as op_add;
-    use std::arch::x86_64::_mm_cmplt_epi32 as op_lt;
-    use std::arch::x86_64::_mm_load_si128 as op_load; // requires 128-bits alignment
-    use std::arch::x86_64::_mm_set1_epi32 as set1;
-    use std::arch::x86_64::_mm_setzero_si128 as set0;
-    use std::arch::x86_64::_mm_sub_epi32 as op_sub;
-    use std::arch::x86_64::{_mm_cvtsi128_si32, _mm_shuffle_epi32};
-
-    const MASK1: i32 = 78;
-    const MASK2: i32 = 177;
-
-    /// Performs an exhaustive linear search over the
-    ///
-    /// There is no early exit here. We simply count the
-    /// number of elements that are `< target`.
-    pub(crate) fn linear_search_sse2_128(arr: &AlignedBuffer, target: u32) -> usize {
-        unsafe {
-            let ptr = arr as *const AlignedBuffer as *const DataType;
-            let vkey = set1(target as i32);
-            let mut cnt = set0();
-            // We work over 4 `__m128i` at a time.
-            // A single `__m128i` actual contains 4 `u32`.
-            for i in 0..(COMPRESSION_BLOCK_SIZE as isize) / (4 * 4) {
-                let cmp1 = op_lt(op_load(ptr.offset(i * 4)), vkey);
-                let cmp2 = op_lt(op_load(ptr.offset(i * 4 + 1)), vkey);
-                let cmp3 = op_lt(op_load(ptr.offset(i * 4 + 2)), vkey);
-                let cmp4 = op_lt(op_load(ptr.offset(i * 4 + 3)), vkey);
-                let sum = op_add(op_add(cmp1, cmp2), op_add(cmp3, cmp4));
-                cnt = op_sub(cnt, sum);
-            }
-            cnt = op_add(cnt, _mm_shuffle_epi32(cnt, MASK1));
-            cnt = op_add(cnt, _mm_shuffle_epi32(cnt, MASK2));
-            _mm_cvtsi128_si32(cnt) as usize
-        }
-    }
-
-    #[cfg(test)]
-    mod test {
-        use super::linear_search_sse2_128;
-        use crate::postings::compression::{AlignedBuffer, COMPRESSION_BLOCK_SIZE};
-
-        #[test]
-        fn test_linear_search_sse2_128_u32() {
-            let mut block = [0u32; COMPRESSION_BLOCK_SIZE];
-            for el in 0u32..128u32 {
-                block[el as usize] = el * 2 + 1 << 18;
-            }
-            let target = block[64] + 1;
-            assert_eq!(linear_search_sse2_128(&AlignedBuffer(block), target), 65);
-        }
-    }
-}
-
-/// This `linear search` browser exhaustively through the array.
-/// but the early exit is very difficult to predict.
+/// The results should be equivalent to
+/// ```compile_fail
+/// block[..]
+//       .iter()
+//       .take_while(|&&val| val < target)
+//       .count()
+/// ```
 ///
-/// Coupled with `exponential search` this function is likely
-/// to be called with the same `len`
-fn linear_search(arr: &[u32], target: u32) -> usize {
-    arr.iter().map(|&el| if el < target { 1 } else { 0 }).sum()
-}
-
-fn exponential_search(arr: &[u32], target: u32) -> Range<usize> {
-    let end = arr.len();
-    let mut begin = 0;
-    for &pivot in &[1, 3, 7, 15, 31, 63] {
-        if pivot >= end {
-            break;
-        }
-        if arr[pivot] > target {
-            return begin..pivot;
-        }
-        begin = pivot;
-    }
-    begin..end
-}
-
-#[inline(never)]
-fn galloping(block_docs: &[u32], target: u32) -> usize {
-    let range = exponential_search(&block_docs, target);
-    range.start + linear_search(&block_docs[range], target)
-}
-
-/// Tantivy may rely on SIMD instructions to search for a specific document within
-/// a given block.
-#[derive(Clone, Copy, PartialEq)]
-pub enum BlockSearcher {
-    #[cfg(target_arch = "x86_64")]
-    Sse2,
-    Scalar,
-}
-
-impl BlockSearcher {
-    /// Search the first index containing an element greater or equal to
-    /// the target.
-    ///
-    /// The results should be equivalent to
-    /// ```compile_fail
-    /// block[..]
-    //       .iter()
-    //       .take_while(|&&val| val < target)
-    //       .count()
-    /// ```
-    ///
-    /// The `start` argument is just used to hint that the response is
-    /// greater than beyond `start`. The implementation may or may not use
-    /// it for optimization.
-    ///
-    /// # Assumption
-    ///
-    /// The array len is > start.
-    /// The block is sorted
-    /// The target is assumed greater or equal to the `arr[start]`.
-    /// The target is assumed smaller or equal to the last element of the block.
-    ///
-    /// Currently the scalar implementation starts by an exponential search, and
-    /// then operates a linear search in the result subarray.
-    ///
-    /// If SSE2 instructions are available in the `(platform, running CPU)`,
-    /// then we use a different implementation that does an exhaustive linear search over
-    /// the block regardless of whether the block is full or not.
-    ///
-    /// Indeed, if the block is not full, the remaining items are TERMINATED.
-    /// It is surprisingly faster, most likely because of the lack of branch misprediction.
-    pub(crate) fn search_in_block(self, block_docs: &AlignedBuffer, target: u32) -> usize {
-        #[cfg(target_arch = "x86_64")]
-        {
-            if self == BlockSearcher::Sse2 {
-                return sse2::linear_search_sse2_128(block_docs, target);
-            }
-        }
-        galloping(&block_docs.0[..], target)
-    }
-}
-
-impl Default for BlockSearcher {
-    fn default() -> BlockSearcher {
-        #[cfg(target_arch = "x86_64")]
-        {
-            if is_x86_feature_detected!("sse2") {
-                return BlockSearcher::Sse2;
-            }
-        }
-        BlockSearcher::Scalar
+/// the `start` argument is just used to hint that the response is
+/// greater than beyond `start`. the implementation may or may not use
+/// it for optimization.
+///
+/// # Assumption
+///
+/// - The block is sorted. Some elements may appear several times. This is the case at the
+/// end of the last block for instance.
+/// - The target is assumed smaller or equal to the last element of the block.
+pub fn branchless_binary_search(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
+    let start_ptr: *const u32 = &arr[0] as *const u32;
+    unsafe {
+        let mut ptr = start_ptr;
+        ptr = binary_search_step(ptr, target, 63);
+        ptr = binary_search_step(ptr, target, 31);
+        ptr = binary_search_step(ptr, target, 15);
+        ptr = binary_search_step(ptr, target, 7);
+        ptr = binary_search_step(ptr, target, 3);
+        ptr = binary_search_step(ptr, target, 1);
+        let extra = if *ptr < target { 1 } else { 0 };
+        (ptr.offset_from(start_ptr) as usize) + extra
    }
 }

 #[cfg(test)]
 mod tests {
-    use super::exponential_search;
-    use super::linear_search;
-    use super::BlockSearcher;
+    use super::branchless_binary_search;
    use crate::docset::TERMINATED;
-    use crate::postings::compression::{AlignedBuffer, COMPRESSION_BLOCK_SIZE};
-
-    #[test]
-    fn test_linear_search() {
-        let len: usize = 50;
-        let arr: Vec<u32> = (0..len).map(|el| 1u32 + (el as u32) * 2).collect();
-        for target in 1..*arr.last().unwrap() {
-            let res = linear_search(&arr[..], target);
-            if res > 0 {
-                assert!(arr[res - 1] < target);
-            }
-            if res < len {
-                assert!(arr[res] >= target);
-            }
-        }
-    }
-
-    #[test]
-    fn test_exponentiel_search() {
-        assert_eq!(exponential_search(&[1, 2], 0), 0..1);
-        assert_eq!(exponential_search(&[1, 2], 1), 0..1);
-        assert_eq!(
-            exponential_search(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 7),
-            3..7
-        );
-    }
-
-    fn util_test_search_in_block(block_searcher: BlockSearcher, block: &[u32], target: u32) {
-        let cursor = search_in_block_trivial_but_slow(block, target);
-        assert!(block.len() < COMPRESSION_BLOCK_SIZE);
-        let mut output_buffer = [TERMINATED; COMPRESSION_BLOCK_SIZE];
-        output_buffer[..block.len()].copy_from_slice(block);
-        assert_eq!(
-            block_searcher.search_in_block(&AlignedBuffer(output_buffer), target),
-            cursor
-        );
-    }
-
-    fn util_test_search_in_block_all(block_searcher: BlockSearcher, block: &[u32]) {
-        use std::collections::HashSet;
-        let mut targets = HashSet::new();
-        for (i, val) in block.iter().cloned().enumerate() {
-            if i > 0 {
-                targets.insert(val - 1);
-            }
-            targets.insert(val);
-        }
-        for target in targets {
-            util_test_search_in_block(block_searcher, block, target);
-        }
-    }
+    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
+    use proptest::prelude::*;
+    use std::collections::HashSet;

    fn search_in_block_trivial_but_slow(block: &[u32], target: u32) -> usize {
        block.iter().take_while(|&&val| val < target).count()
    }

-    fn test_search_in_block_util(block_searcher: BlockSearcher) {
-        for len in 1u32..128u32 {
-            let v: Vec<u32> = (0..len).map(|i| i * 2).collect();
-            util_test_search_in_block_all(block_searcher, &v[..]);
+    fn util_test_search_in_block(block: &[u32], target: u32) {
+        let cursor = search_in_block_trivial_but_slow(block, target);
+        assert!(cursor < COMPRESSION_BLOCK_SIZE);
+        assert!(block[cursor] >= target);
+        if cursor > 0 {
+            assert!(block[cursor - 1] < target);
+        }
+        assert_eq!(block.len(), COMPRESSION_BLOCK_SIZE);
+        let mut output_buffer = [TERMINATED; COMPRESSION_BLOCK_SIZE];
+        output_buffer[..block.len()].copy_from_slice(block);
+        assert_eq!(branchless_binary_search(&output_buffer, target), cursor);
+    }
+
+    fn util_test_search_in_block_all(block: &[u32]) {
+        let mut targets = HashSet::new();
+        targets.insert(0);
+        for &val in block {
+            if val > 0 {
+                targets.insert(val - 1);
+            }
+            targets.insert(val);
+        }
+        for target in targets {
+            util_test_search_in_block(block, target);
        }
    }

    #[test]
-    fn test_search_in_block_scalar() {
-        test_search_in_block_util(BlockSearcher::Scalar);
+    fn test_search_in_branchless_binary_search() {
+        let v: Vec<u32> = (0..COMPRESSION_BLOCK_SIZE).map(|i| i as u32 * 2).collect();
+        util_test_search_in_block_all(&v[..]);
    }

-    #[cfg(target_arch = "x86_64")]
-    #[test]
-    fn test_search_in_block_sse2() {
-        test_search_in_block_util(BlockSearcher::Sse2);
+    fn monotonous_block() -> impl Strategy<Value = Vec<u32>> {
+        prop::collection::vec(0u32..5u32, COMPRESSION_BLOCK_SIZE).prop_map(|mut deltas| {
+            let mut el = 0;
+            for i in 0..COMPRESSION_BLOCK_SIZE {
+                el += deltas[i];
+                deltas[i] = el;
+            }
+            deltas
+        })
+    }
+
+    proptest! {
+        #[test]
+        fn test_proptest_branchless_binary_search(block in monotonous_block()) {
+            util_test_search_in_block_all(&block[..]);
+        }
    }
 }
--- a/src/postings/block_segment_postings.rs
+++ b/src/postings/block_segment_postings.rs
@@ -1,23 +1,17 @@
 use std::io;

-use crate::common::{BinarySerializable, VInt};
 use crate::directory::FileSlice;
 use crate::directory::OwnedBytes;
 use crate::fieldnorm::FieldNormReader;
-use crate::postings::compression::{
-    AlignedBuffer, BlockDecoder, VIntDecoder, COMPRESSION_BLOCK_SIZE,
-};
+use crate::postings::compression::{BlockDecoder, VIntDecoder, COMPRESSION_BLOCK_SIZE};
 use crate::postings::{BlockInfo, FreqReadingOption, SkipReader};
 use crate::query::Bm25Weight;
 use crate::schema::IndexRecordOption;
 use crate::{DocId, Score, TERMINATED};
+use common::{BinarySerializable, VInt};

 fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
-    if let Some(first) = it.next() {
-        Some(it.fold(first, Score::max))
-    } else {
-        None
-    }
+    it.next().map(|first| it.fold(first, Score::max))
 }

 /// `BlockSegmentPostings` is a cursor iterating over blocks
@@ -213,9 +207,9 @@ impl BlockSegmentPostings {
    ///
    /// This method is useful to run SSE2 linear search.
    #[inline]
-    pub(crate) fn docs_aligned(&self) -> &AlignedBuffer {
+    pub(crate) fn full_block(&self) -> &[DocId; COMPRESSION_BLOCK_SIZE] {
        debug_assert!(self.block_is_loaded());
-        self.doc_decoder.output_aligned()
+        self.doc_decoder.full_output()
    }

    /// Return the document at index `idx` of the block.
@@ -353,7 +347,6 @@ impl BlockSegmentPostings {
 #[cfg(test)]
 mod tests {
    use super::BlockSegmentPostings;
-    use crate::common::HasLen;
    use crate::core::Index;
    use crate::docset::{DocSet, TERMINATED};
    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
@@ -364,6 +357,7 @@ mod tests {
    use crate::schema::Term;
    use crate::schema::INDEXED;
    use crate::DocId;
+    use common::HasLen;

    #[test]
    fn test_empty_segment_postings() {
@@ -482,11 +476,11 @@ mod tests {
            docs.push((i * i / 100) + i);
        }
        let mut block_postings = build_block_postings(&docs[..]);
-        for i in vec![0, 424, 10000] {
-            block_postings.seek(i);
+        for i in &[0, 424, 10000] {
+            block_postings.seek(*i);
            let docs = block_postings.docs();
-            assert!(docs[0] <= i);
-            assert!(docs.last().cloned().unwrap_or(0u32) >= i);
+            assert!(docs[0] <= *i);
+            assert!(docs.last().cloned().unwrap_or(0u32) >= *i);
        }
        block_postings.seek(100_000);
        assert_eq!(block_postings.doc(COMPRESSION_BLOCK_SIZE - 1), TERMINATED);
--- a/src/postings/compression/mod.rs
+++ b/src/postings/compression/mod.rs
@@ -1,5 +1,5 @@
-use crate::common::FixedSize;
 use bitpacking::{BitPacker, BitPacker4x};
+use common::FixedSize;

 pub const COMPRESSION_BLOCK_SIZE: usize = BitPacker4x::BLOCK_LEN;
 const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * u32::SIZE_IN_BYTES;
@@ -49,16 +49,10 @@ impl BlockEncoder {
    }
 }

-/// We ensure that the OutputBuffer is align on 128 bits
-/// in order to run SSE2 linear search on it.
-#[repr(align(128))]
-#[derive(Clone)]
-pub(crate) struct AlignedBuffer(pub [u32; COMPRESSION_BLOCK_SIZE]);
-
 #[derive(Clone)]
 pub struct BlockDecoder {
    bitpacker: BitPacker4x,
-    output: AlignedBuffer,
+    output: [u32; COMPRESSION_BLOCK_SIZE],
    pub output_len: usize,
 }

@@ -72,7 +66,7 @@ impl BlockDecoder {
    pub fn with_val(val: u32) -> BlockDecoder {
        BlockDecoder {
            bitpacker: BitPacker4x::new(),
-            output: AlignedBuffer([val; COMPRESSION_BLOCK_SIZE]),
+            output: [val; COMPRESSION_BLOCK_SIZE],
            output_len: 0,
        }
    }
@@ -85,28 +79,28 @@ impl BlockDecoder {
    ) -> usize {
        self.output_len = COMPRESSION_BLOCK_SIZE;
        self.bitpacker
-            .decompress_sorted(offset, &compressed_data, &mut self.output.0, num_bits)
+            .decompress_sorted(offset, compressed_data, &mut self.output, num_bits)
    }

    pub fn uncompress_block_unsorted(&mut self, compressed_data: &[u8], num_bits: u8) -> usize {
        self.output_len = COMPRESSION_BLOCK_SIZE;
        self.bitpacker
-            .decompress(&compressed_data, &mut self.output.0, num_bits)
+            .decompress(compressed_data, &mut self.output, num_bits)
    }

    #[inline]
    pub fn output_array(&self) -> &[u32] {
-        &self.output.0[..self.output_len]
+        &self.output[..self.output_len]
    }

    #[inline]
-    pub(crate) fn output_aligned(&self) -> &AlignedBuffer {
+    pub(crate) fn full_output(&self) -> &[u32; COMPRESSION_BLOCK_SIZE] {
        &self.output
    }

    #[inline]
    pub fn output(&self, idx: usize) -> u32 {
-        self.output.0[idx]
+        self.output[idx]
    }
 }

@@ -190,8 +184,8 @@ impl VIntDecoder for BlockDecoder {
        padding: u32,
    ) -> usize {
        self.output_len = num_els;
-        self.output.0.iter_mut().for_each(|el| *el = padding);
-        vint::uncompress_sorted(compressed_data, &mut self.output.0[..num_els], offset)
+        self.output.iter_mut().for_each(|el| *el = padding);
+        vint::uncompress_sorted(compressed_data, &mut self.output[..num_els], offset)
    }

    fn uncompress_vint_unsorted(
@@ -201,12 +195,12 @@ impl VIntDecoder for BlockDecoder {
        padding: u32,
    ) -> usize {
        self.output_len = num_els;
-        self.output.0.iter_mut().for_each(|el| *el = padding);
-        vint::uncompress_unsorted(compressed_data, &mut self.output.0[..num_els])
+        self.output.iter_mut().for_each(|el| *el = padding);
+        vint::uncompress_unsorted(compressed_data, &mut self.output[..num_els])
    }

    fn uncompress_vint_unsorted_until_end(&mut self, compressed_data: &[u8]) {
-        let num_els = vint::uncompress_unsorted_until_end(compressed_data, &mut self.output.0);
+        let num_els = vint::uncompress_unsorted_until_end(compressed_data, &mut self.output);
        self.output_len = num_els;
    }
 }
@@ -303,7 +297,7 @@ pub mod tests {
            assert!(encoded_data.len() <= expected_length);
            let mut decoder = BlockDecoder::default();
            let consumed_num_bytes =
-                decoder.uncompress_vint_sorted(&encoded_data, *offset, input.len(), PADDING_VALUE);
+                decoder.uncompress_vint_sorted(encoded_data, *offset, input.len(), PADDING_VALUE);
            assert_eq!(consumed_num_bytes, encoded_data.len());
            assert_eq!(input, decoder.output_array());
            for i in input.len()..COMPRESSION_BLOCK_SIZE {
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -3,6 +3,9 @@ Postings module (also called inverted index)
 */

 mod block_search;
+
+pub(crate) use self::block_search::branchless_binary_search;
+
 mod block_segment_postings;
 pub(crate) mod compression;
 mod postings;
@@ -14,7 +17,6 @@ mod skip;
 mod stacker;
 mod term_info;

-pub(crate) use self::block_search::BlockSearcher;
 pub use self::block_segment_postings::BlockSegmentPostings;
 pub use self::postings::Postings;
 pub(crate) use self::postings_writer::MultiFieldPostingsWriter;
@@ -54,7 +56,7 @@ pub mod tests {
    use crate::DocId;
    use crate::HasLen;
    use crate::Score;
-    use std::{iter, mem};
+    use std::mem;

    #[test]
    pub fn test_position_write() -> crate::Result<()> {
@@ -153,8 +155,8 @@ pub mod tests {

    #[test]
    pub fn test_drop_token_that_are_too_long() -> crate::Result<()> {
-        let ok_token_text: String = iter::repeat('A').take(MAX_TOKEN_LEN).collect();
-        let mut exceeding_token_text: String = iter::repeat('A').take(MAX_TOKEN_LEN + 1).collect();
+        let ok_token_text: String = "A".repeat(MAX_TOKEN_LEN);
+        let mut exceeding_token_text: String = "A".repeat(MAX_TOKEN_LEN + 1);
        exceeding_token_text.push_str(" hello");
        let mut schema_builder = Schema::builder();
        let text_options = TextOptions::default().set_indexing_options(
@@ -164,7 +166,7 @@ pub mod tests {
        );
        let text_field = schema_builder.add_text_field("text", text_options);
        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema.clone());
+        let index = Index::create_in_ram(schema);
        index
            .tokenizers()
            .register("simple_no_truncation", SimpleTokenizer);
@@ -229,7 +231,7 @@ pub mod tests {
                segment_writer.add_document(op, &schema).unwrap();
            }
            for i in 2..1000 {
-                let mut text: String = iter::repeat("e ").take(i).collect();
+                let mut text: String = "e ".repeat(i);
                text.push_str(" a");
                let op = AddOperation {
                    opstamp: 2u64,
@@ -409,7 +411,7 @@ pub mod tests {
                .unwrap();

            for i in 0..num_docs / 2 - 1 {
-                assert!(segment_postings.seek(i * 2 + 1) > (i * 1) * 2);
+                assert!(segment_postings.seek(i * 2 + 1) > i * 2);
                assert_eq!(segment_postings.doc(), (i + 1) * 2);
            }
        }
@@ -431,13 +433,10 @@ pub mod tests {
                .read_postings(&term_2, IndexRecordOption::Basic)?
                .unwrap();

+            assert_eq!(segment_postings.seek(i), i);
+            assert_eq!(segment_postings.doc(), i);
            if i % 2 == 0 {
-                assert_eq!(segment_postings.seek(i), i);
-                assert_eq!(segment_postings.doc(), i);
                assert!(segment_reader.is_deleted(i));
-            } else {
-                assert_eq!(segment_postings.seek(i), i);
-                assert_eq!(segment_postings.doc(), i);
            }
        }

--- a/src/postings/postings.rs
+++ b/src/postings/postings.rs
@@ -11,7 +11,7 @@ use crate::docset::DocSet;
 /// but other implementations mocking `SegmentPostings` exist,
 /// for merging segments or for testing.
 pub trait Postings: DocSet + 'static {
-    /// Returns the term frequency
+    /// The number of times the term appears in the document.
    fn term_freq(&self) -> u32;

    /// Returns the positions offseted with a given value.
--- a/src/postings/postings_writer.rs
+++ b/src/postings/postings_writer.rs
@@ -231,13 +231,7 @@ pub trait PostingsWriter {
            // We skip all tokens with a len greater than u16.
            if token.text.len() <= MAX_TOKEN_LEN {
                term_buffer.set_text(token.text.as_str());
-                self.subscribe(
-                    term_index,
-                    doc_id,
-                    token.position as u32,
-                    &term_buffer,
-                    heap,
-                );
+                self.subscribe(term_index, doc_id, token.position as u32, term_buffer, heap);
            } else {
                warn!(
                    "A token exceeding MAX_TOKEN_LEN ({}>{}) was dropped. Search for \
--- a/src/postings/recorder.rs
+++ b/src/postings/recorder.rs
@@ -1,10 +1,8 @@
 use super::stacker::{ExpUnrolledLinkedList, MemoryArena};
+use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::postings::FieldSerializer;
 use crate::DocId;
-use crate::{
-    common::{read_u32_vint, write_u32_vint},
-    indexer::doc_id_mapping::DocIdMapping,
-};
+use common::{read_u32_vint, write_u32_vint};

 const POSITION_END: u32 = 0;

@@ -282,7 +280,7 @@ impl Recorder for TfAndPositionRecorder {
                doc_id_and_positions
                    .push((doc_id_map.get_new_doc_id(doc), buffer_positions.to_vec()));
            } else {
-                serializer.write_doc(doc, buffer_positions.len() as u32, &buffer_positions);
+                serializer.write_doc(doc, buffer_positions.len() as u32, buffer_positions);
            }
        }
        if doc_id_map.is_some() {
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -1,12 +1,12 @@
-use crate::common::HasLen;
 use crate::docset::DocSet;
 use crate::fastfield::DeleteBitSet;
 use crate::positions::PositionReader;
+use crate::postings::branchless_binary_search;
 use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
-use crate::postings::BlockSearcher;
 use crate::postings::BlockSegmentPostings;
 use crate::postings::Postings;
 use crate::{DocId, TERMINATED};
+use common::HasLen;

 /// `SegmentPostings` represents the inverted list or postings associated to
 /// a term in a `Segment`.
@@ -18,7 +18,6 @@ pub struct SegmentPostings {
    pub(crate) block_cursor: BlockSegmentPostings,
    cur: usize,
    position_reader: Option<PositionReader>,
-    block_searcher: BlockSearcher,
 }

 impl SegmentPostings {
@@ -28,7 +27,6 @@ impl SegmentPostings {
            block_cursor: BlockSegmentPostings::empty(),
            cur: 0,
            position_reader: None,
-            block_searcher: BlockSearcher::default(),
        }
    }

@@ -107,7 +105,7 @@ impl SegmentPostings {
        let fieldnorm_reader = fieldnorms.map(FieldNormReader::for_test);
        let average_field_norm = fieldnorms
            .map(|fieldnorms| {
-                if fieldnorms.len() == 0 {
+                if fieldnorms.is_empty() {
                    return 0.0;
                }
                let total_num_tokens: u64 = fieldnorms
@@ -154,7 +152,6 @@ impl SegmentPostings {
            block_cursor: segment_block_postings,
            cur: 0, // cursor within the block
            position_reader,
-            block_searcher: BlockSearcher::default(),
        }
    }
 }
@@ -183,8 +180,8 @@ impl DocSet for SegmentPostings {
        self.block_cursor.seek(target);

        // At this point we are on the block, that might contain our document.
-        let output = self.block_cursor.docs_aligned();
-        self.cur = self.block_searcher.search_in_block(&output, target);
+        let output = self.block_cursor.full_block();
+        self.cur = branchless_binary_search(output, target);

        // The last block is not full and padded with the value TERMINATED,
        // so that we are guaranteed to have at least doc in the block (a real one or the padding)
@@ -197,7 +194,7 @@ impl DocSet for SegmentPostings {
        // with the value `TERMINATED`.
        //
        // After the search, the cursor should point to the first value of TERMINATED.
-        let doc = output.0[self.cur];
+        let doc = output[self.cur];
        debug_assert!(doc >= target);
        debug_assert_eq!(doc, self.doc());
        doc
@@ -268,7 +265,7 @@ impl Postings for SegmentPostings {
 mod tests {

    use super::SegmentPostings;
-    use crate::common::HasLen;
+    use common::HasLen;

    use crate::docset::{DocSet, TERMINATED};
    use crate::fastfield::DeleteBitSet;
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -1,7 +1,6 @@
 use super::TermInfo;
-use crate::common::{BinarySerializable, VInt};
-use crate::common::{CompositeWrite, CountingWriter};
 use crate::core::Segment;
+use crate::directory::CompositeWrite;
 use crate::directory::WritePtr;
 use crate::fieldnorm::FieldNormReader;
 use crate::positions::PositionSerializer;
@@ -12,6 +11,8 @@ use crate::schema::{Field, FieldEntry, FieldType};
 use crate::schema::{IndexRecordOption, Schema};
 use crate::termdict::{TermDictionaryBuilder, TermOrdinal};
 use crate::{DocId, Score};
+use common::CountingWriter;
+use common::{BinarySerializable, VInt};
 use std::cmp::Ordering;
 use std::io::{self, Write};

@@ -356,7 +357,7 @@ impl<W: Write> PostingsSerializer<W> {
            // encode the doc ids
            let (num_bits, block_encoded): (u8, &[u8]) = self
                .block_encoder
-                .compress_block_sorted(&self.block.doc_ids(), self.last_doc_id_encoded);
+                .compress_block_sorted(self.block.doc_ids(), self.last_doc_id_encoded);
            self.last_doc_id_encoded = self.block.last_doc();
            self.skip_write
                .write_doc(self.last_doc_id_encoded, num_bits);
@@ -366,7 +367,7 @@ impl<W: Write> PostingsSerializer<W> {
        if self.mode.has_freq() {
            let (num_bits, block_encoded): (u8, &[u8]) = self
                .block_encoder
-                .compress_block_unsorted(&self.block.term_freqs());
+                .compress_block_unsorted(self.block.term_freqs());
            self.postings_write.extend(block_encoded);
            self.skip_write.write_term_freq(num_bits);
            if self.mode.has_positions() {
@@ -426,7 +427,7 @@ impl<W: Write> PostingsSerializer<W> {
            {
                let block_encoded = self
                    .block_encoder
-                    .compress_vint_sorted(&self.block.doc_ids(), self.last_doc_id_encoded);
+                    .compress_vint_sorted(self.block.doc_ids(), self.last_doc_id_encoded);
                self.postings_write.write_all(block_encoded)?;
            }
            // ... Idem for term frequencies
@@ -442,10 +443,8 @@ impl<W: Write> PostingsSerializer<W> {
            let skip_data = self.skip_write.data();
            VInt(skip_data.len() as u64).serialize(&mut self.output_write)?;
            self.output_write.write_all(skip_data)?;
-            self.output_write.write_all(&self.postings_write[..])?;
-        } else {
-            self.output_write.write_all(&self.postings_write[..])?;
        }
+        self.output_write.write_all(&self.postings_write[..])?;
        self.skip_write.clear();
        self.postings_write.clear();
        self.bm25_weight = None;
--- a/src/postings/stacker/expull.rs
+++ b/src/postings/stacker/expull.rs
@@ -280,11 +280,8 @@ mod tests {
            if v.len() >= 10 {
                break;
            }
-            match len_to_capacity(i) {
-                CapacityResult::NeedAlloc(cap) => {
-                    v.push((i, cap));
-                }
-                _ => {}
+            if let CapacityResult::NeedAlloc(cap) = len_to_capacity(i) {
+                v.push((i, cap));
            }
        }
        assert_eq!(
--- a/src/postings/stacker/term_hashmap.rs
+++ b/src/postings/stacker/term_hashmap.rs
@@ -151,7 +151,7 @@ impl TermHashMap {
    pub fn iter(&self) -> Iter<'_> {
        Iter {
            inner: self.occupied.iter(),
-            hashmap: &self,
+            hashmap: self,
        }
    }

@@ -261,8 +261,8 @@ mod tests {
        }

        let mut vanilla_hash_map = HashMap::new();
-        let mut iter_values = hash_map.iter();
-        while let Some((key, addr, _)) = iter_values.next() {
+        let iter_values = hash_map.iter();
+        for (key, addr, _) in iter_values {
            let val: u32 = hash_map.heap.read(addr);
            vanilla_hash_map.insert(key.to_owned(), val);
        }
--- a/src/postings/term_info.rs
+++ b/src/postings/term_info.rs
@@ -1,4 +1,4 @@
-use crate::common::{BinarySerializable, FixedSize};
+use common::{BinarySerializable, FixedSize};
 use std::io;
 use std::iter::ExactSizeIterator;
 use std::ops::Range;
@@ -67,7 +67,7 @@ impl BinarySerializable for TermInfo {
 mod tests {

    use super::TermInfo;
-    use crate::common::test::fixed_size_test;
+    use crate::tests::fixed_size_test;

    // TODO add serialize/deserialize test for terminfo

--- a/src/query/automaton_weight.rs
+++ b/src/query/automaton_weight.rs
@@ -1,4 +1,3 @@
-use crate::common::BitSet;
 use crate::core::SegmentReader;
 use crate::query::ConstScorer;
 use crate::query::{BitSetDocSet, Explanation};
@@ -7,6 +6,7 @@ use crate::schema::{Field, IndexRecordOption};
 use crate::termdict::{TermDictionary, TermStreamer};
 use crate::TantivyError;
 use crate::{DocId, Score};
+use common::BitSet;
 use std::io;
 use std::sync::Arc;
 use tantivy_fst::Automaton;
@@ -121,10 +121,7 @@ mod tests {
        }

        fn is_match(&self, state: &Self::State) -> bool {
-            match *state {
-                State::AfterA => true,
-                _ => false,
-            }
+            matches!(*state, State::AfterA)
        }

        fn accept(&self, state: &Self::State, byte: u8) -> Self::State {
--- a/src/query/bitset/mod.rs
+++ b/src/query/bitset/mod.rs
@@ -1,6 +1,6 @@
-use crate::common::{BitSet, TinySet};
 use crate::docset::{DocSet, TERMINATED};
 use crate::DocId;
+use common::{BitSet, TinySet};

 /// A `BitSetDocSet` makes it possible to iterate through a bitset as if it was a `DocSet`.
 ///
@@ -96,10 +96,13 @@ impl DocSet for BitSetDocSet {

 #[cfg(test)]
 mod tests {
+    use std::collections::BTreeSet;
+
    use super::BitSetDocSet;
-    use crate::common::BitSet;
    use crate::docset::{DocSet, TERMINATED};
+    use crate::tests::generate_nonunique_unsorted;
    use crate::DocId;
+    use common::BitSet;

    fn create_docbitset(docs: &[DocId], max_doc: DocId) -> BitSetDocSet {
        let mut docset = BitSet::with_max_value(max_doc);
@@ -109,6 +112,29 @@ mod tests {
        BitSetDocSet::from(docset)
    }

+    #[test]
+    fn test_bitset_large() {
+        let arr = generate_nonunique_unsorted(100_000, 5_000);
+        let mut btreeset: BTreeSet<u32> = BTreeSet::new();
+        let mut bitset = BitSet::with_max_value(100_000);
+        for el in arr {
+            btreeset.insert(el);
+            bitset.insert(el);
+        }
+        for i in 0..100_000 {
+            assert_eq!(btreeset.contains(&i), bitset.contains(i));
+        }
+        assert_eq!(btreeset.len(), bitset.len());
+        let mut bitset_docset = BitSetDocSet::from(bitset);
+        let mut remaining = true;
+        for el in btreeset.into_iter() {
+            assert!(remaining);
+            assert_eq!(bitset_docset.doc(), el);
+            remaining = bitset_docset.advance() != TERMINATED;
+        }
+        assert!(!remaining);
+    }
+
    #[test]
    fn test_empty() {
        let bitset = BitSet::with_max_value(1000);
--- a/src/query/boolean_query/block_wand.rs
+++ b/src/query/boolean_query/block_wand.rs
@@ -251,7 +251,7 @@ mod tests {

    impl PartialEq for Float {
        fn eq(&self, other: &Self) -> bool {
-            self.cmp(&other) == Ordering::Equal
+            self.cmp(other) == Ordering::Equal
        }
    }

@@ -289,7 +289,7 @@ mod tests {
            if !nearly_equals(score, limit) {
                checkpoints.push((doc, score));
            }
-            return limit;
+            limit
        });
        checkpoints
    }
@@ -368,10 +368,10 @@ mod tests {
            .iter()
            .map(|posting_list| {
                posting_list
-                    .into_iter()
+                    .iter()
                    .cloned()
                    .flat_map(|(doc, term_freq)| {
-                        (0 as u32..REPEAT as u32).map(move |offset| {
+                        (0_u32..REPEAT as u32).map(move |offset| {
                            (
                                doc * (REPEAT as u32) + offset,
                                if offset == 0 { term_freq } else { 1 },
--- a/src/query/boolean_query/mod.rs
+++ b/src/query/boolean_query/mod.rs
@@ -310,7 +310,7 @@ mod tests {
        ));
        let query = BooleanQuery::from(vec![(Occur::Should, term_a), (Occur::Should, term_b)]);
        let explanation = query.explain(&searcher, DocAddress::new(0, 0u32))?;
-        assert_nearly_equals!(explanation.value(), 0.6931472);
+        assert_nearly_equals!(explanation.value(), std::f32::consts::LN_2);
        Ok(())
    }
 }
--- a/src/query/more_like_this/more_like_this.rs
+++ b/src/query/more_like_this/more_like_this.rs
@@ -329,7 +329,7 @@ impl MoreLikeThis {
                continue;
            }

-            let doc_freq = searcher.doc_freq(&term)?;
+            let doc_freq = searcher.doc_freq(term)?;

            // ignore terms with less than min_doc_frequency
            if self
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Paul Masurel	dce6adc4b6	Revert "add index accessor for index writer (#1159 )" This reverts commit `b256df6599`.	2021-09-23 21:49:34 +09:00
Mestery	b256df6599	add index accessor for index writer (#1159 ) * add index accessor for index writer * Update src/indexer/index_writer.rs Co-authored-by: Paul Masurel <paul@quickwit.io>	2021-09-23 21:49:20 +09:00
dependabot[bot]	37c5fe3c86	Update memmap2 requirement from 0.4 to 0.5 (#1157 ) Updates the requirements on [memmap2](https://github.com/RazrFalcon/memmap2-rs) to permit the latest version. - [Release notes](https://github.com/RazrFalcon/memmap2-rs/releases) - [Changelog](https://github.com/RazrFalcon/memmap2-rs/blob/master/CHANGELOG.md) - [Commits](https://github.com/RazrFalcon/memmap2-rs/compare/v0.4.0...v0.5.0) --- updated-dependencies: - dependency-name: memmap2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2021-09-23 20:18:27 +09:00
dependabot[bot]	2c78b31aab	Update memmap2 requirement from 0.3 to 0.4 (#1155 ) Updates the requirements on [memmap2](https://github.com/RazrFalcon/memmap2-rs) to permit the latest version. - [Release notes](https://github.com/RazrFalcon/memmap2-rs/releases) - [Changelog](https://github.com/RazrFalcon/memmap2-rs/blob/master/CHANGELOG.md) - [Commits](https://github.com/RazrFalcon/memmap2-rs/compare/v.0.3.0...v0.4.0)	2021-09-17 08:52:52 +09:00
Paul Masurel	46b86a7976	Bounced version and edited changelog	2021-09-10 23:05:09 +09:00
PSeitz	3bc177e69d	fix #1151 (#1152 ) * fix #1151 Fixes a off by one error in the stats for the index fast field in the multi value fast field. When retrieving the data range for a docid, `get(doc)..get(docid+1)` is requested. On creation the num_vals statistic was set to doc instead of docid + 1. In the multivaluelinearinterpol fast field the last value was therefore not serialized (and would return 0 instead in most cases). So the last document get(lastdoc)..get(lastdoc + 1) would return the invalid range `value..0`. This PR adds a proptest to cover this scenario. A combination of a large number values, since multilinear interpolation is only active for more than 5_000 values, and a merge is required.	2021-09-10 23:00:37 +09:00
PSeitz	319609e9c1	test cargo-llvm-cov (#1149 )	2021-09-03 22:00:43 +09:00
Kanji Yomoda	9d87b89718	Fix incorrect comment for Index::create_in_dir (#1148 ) * Fix incorrect comment for Index::create_in_dir	2021-09-03 10:37:16 +09:00
Tomoko Uchida	dd81e38e53	Add WhitespaceTokenizer (#1147 ) * Add WhitespaceTokenizer.	2021-08-29 18:20:49 +09:00
Paul Masurel	9f32b22602	Preparing for release.	2021-08-26 09:07:08 +09:00
sigaloid	096ce7488e	Resolve some clippys, format (#1144 ) * cargo +nightly clippy --fix -Z unstable-options	2021-08-26 08:46:00 +09:00
PSeitz	a1782dd172	Update index_sorting.md	2021-08-25 07:55:50 +01:00
PSeitz	000d76b11a	Update index_sorting.md	2021-08-24 19:28:06 +01:00
PSeitz	abd29f6646	Update index_sorting.md	2021-08-24 19:26:19 +01:00
PSeitz	b4ecf0ab2f	Merge pull request #1146 from tantivy-search/sorting_doc add sorting to book	2021-08-23 17:37:54 +01:00
Pascal Seitz	798f7dbf67	add sorting to book	2021-08-23 17:36:41 +01:00
PSeitz	06a2e47c8d	Merge pull request #1145 from tantivy-search/blub2 cargo fmt	2021-08-21 18:52:50 +01:00
Pascal Seitz	e0b83eb291	cargo fmt	2021-08-21 18:52:10 +01:00
PSeitz	13401f46ea	add wildcard mention	2021-08-21 18:10:33 +01:00
PSeitz	1a45b030dc	Merge pull request #1141 from tantivy-search/tantivy_common dissolve common module	2021-08-20 08:03:37 +01:00
Pascal Seitz	62052bcc2d	add missing test function closes #1139	2021-08-20 07:26:22 +01:00
Pascal Seitz	3265f7bec3	dissolve common module	2021-08-19 23:26:34 +01:00
Pascal Seitz	ee0881712a	move bitset to common crate, move composite file to directory	2021-08-19 17:45:09 +01:00
PSeitz	483e0336b6	Merge pull request #1140 from tantivy-search/tantivy_common rename common to tantivy-common	2021-08-19 13:02:54 +01:00
Pascal Seitz	3e8f267e33	rename common to tantivy-common	2021-08-19 10:27:20 +01:00
Paul Masurel	3b247fd968	Version bump	2021-08-19 10:12:30 +09:00
Paul Masurel	750f6e6479	Removed obsolete unit test (#1138 )	2021-08-19 10:07:49 +09:00
Evance Soumaoro	5b475e6603	Checksum validation using active files (#1130 ) * now validate checksum uses segment files not managed files	2021-08-19 10:03:20 +09:00
PSeitz	0ca7f73dc5	add docs badge, fix build badge	2021-08-13 19:40:33 +01:00
PSeitz	47ed18845e	Merge pull request #1136 from tantivy-search/minor_fixes more docs detail	2021-08-13 18:11:47 +01:00
Pascal Seitz	dc141cdb29	more docs detail remove code duplicate	2021-08-13 17:40:13 +01:00
PSeitz	f6cf6e889b	Merge pull request #1133 from tantivy-search/merge_overflow test doc_freq and term_freq in sorted index	2021-08-05 07:53:46 +01:00
Pascal Seitz	f379a80233	test doc_freq and term_freq in sorted index	2021-08-03 11:38:05 +01:00
PSeitz	4a320fd1ff	fix delta position in merge and index sorting (#1132 ) fixes #1125	2021-08-03 18:06:36 +09:00
PSeitz	85d23e8e3b	Merge pull request #1129 from tantivy-search/merge_overflow add long running test in ci	2021-08-02 15:54:31 +01:00
Pascal Seitz	022ab9d298	don't run as pr	2021-08-02 15:44:00 +01:00
Pascal Seitz	605e8603dc	add positions to long running test	2021-08-02 15:29:49 +01:00
Pascal Seitz	70f160b329	add long running test in ci	2021-08-02 11:35:39 +01:00
PSeitz	6d265e6bed	fix gh action name	2021-08-02 10:38:01 +01:00
PSeitz	fdc512391b	Merge pull request #1128 from tantivy-search/merge_overflow add sort to functional test, add env for iterations	2021-08-02 10:29:16 +01:00
Pascal Seitz	108714c934	add sort to functional test, add env for iterations	2021-08-02 10:11:17 +01:00
Paul Masurel	44e8cf98a5	Cargo fmt	2021-07-30 15:30:01 +09:00
Paul Masurel	f0ee69d9e9	Remove the complicated block search logic for a simpler branchless (#1124 ) binary search The code is simpler and faster. Before test postings::bench::bench_segment_intersection ... bench: 2,093,697 ns/iter (+/- 115,509) test postings::bench::bench_skip_next_p01 ... bench: 58,585 ns/iter (+/- 796) test postings::bench::bench_skip_next_p1 ... bench: 160,872 ns/iter (+/- 5,164) test postings::bench::bench_skip_next_p10 ... bench: 615,229 ns/iter (+/- 25,108) test postings::bench::bench_skip_next_p90 ... bench: 1,120,509 ns/iter (+/- 22,271) After test postings::bench::bench_segment_intersection ... bench: 1,747,726 ns/iter (+/- 52,867) test postings::bench::bench_skip_next_p01 ... bench: 55,205 ns/iter (+/- 714) test postings::bench::bench_skip_next_p1 ... bench: 131,433 ns/iter (+/- 2,814) test postings::bench::bench_skip_next_p10 ... bench: 478,830 ns/iter (+/- 12,794) test postings::bench::bench_skip_next_p90 ... bench: 931,082 ns/iter (+/- 31,468)	2021-07-30 14:38:42 +09:00
Evance Soumaoro	b8a10c8406	switched to memmap2-rs (#1120 )	2021-07-27 18:40:41 +09:00
PSeitz	ff4813529e	add comments on compression (#1119 )	2021-07-26 22:54:22 +09:00
PSeitz	470bc18e9b	Merge pull request #1118 from tantivy-search/remove_rand move rand to optional dependencies	2021-07-21 18:01:22 +01:00
Pascal Seitz	0b1add0ec6	move rand to optional dependencies closes #1117	2021-07-21 17:49:24 +01:00
François Massot	1db76dd9cf	Merge pull request #1113 from shikhar/patch-1 stale comments in segment_reader.rs	2021-07-20 23:02:20 +02:00
François Massot	467a9517db	Merge pull request #1114 from shikhar/patch-2 FilterCollector doc fix	2021-07-20 21:02:28 +02:00
Shikhar Bhushan	b361315a67	FilterCollector doc fix Other types supported since https://github.com/tantivy-search/tantivy/pull/953/files	2021-07-15 22:55:47 -04:00
Shikhar Bhushan	4e3771bffc	stale comments in segment_reader.rs	2021-07-15 22:47:32 -04:00
PSeitz	8176b0335a	Merge pull request #1108 from PSeitz/pwnedbytes move ownedbytes to own crate	2021-07-05 16:07:56 +02:00
Pascal Seitz	811ac98f36	more inlines	2021-07-05 15:49:42 +02:00
François Massot	f4b2e71800	Handle field names with any characters with a known set of special (#1109 ) * Handle field names with any characters with a known set of special characters and an escape one * Update field name validation rule to check only if it has at least one character and does not start with `-` Closes #1087.	2021-07-05 22:31:36 +09:00
PSeitz	c431cfcf12	extend proptests, fix race condition (#1107 ) * extend proptests, fix race condition * cargo fmt	2021-07-05 18:28:56 +09:00
PSeitz	92f20bc5a2	use nightly image in coverage	2021-07-03 09:38:44 +02:00
PSeitz	57f931da3c	Create coverage.yml	2021-07-03 09:35:07 +02:00
Pascal Seitz	9b662e6d03	move ownedbytes to own crate fixes #1106	2021-07-02 16:51:59 +02:00
PSeitz	18377d949c	Merge pull request #1105 from PSeitz/clippy Fix clippy warnings	2021-07-02 10:01:19 +02:00
Pascal Seitz	e6427b2588	cleanup	2021-07-02 09:21:22 +02:00
Pascal Seitz	0062fe705d	cargo fmt	2021-07-01 18:17:08 +02:00
Pascal Seitz	9b3e508753	fix clippy	2021-07-01 18:06:09 +02:00
Pascal Seitz	a1ac63ee1c	fix clippy	2021-07-01 18:06:03 +02:00
Pascal Seitz	e496ae0470	clippy fixes	2021-07-01 17:43:50 +02:00
Pascal Seitz	1e4df54ab3	fix clippy	2021-07-01 17:41:53 +02:00
Pascal Seitz	2de249af74	clippy fixes	2021-07-01 17:37:37 +02:00
Pascal Seitz	10f056fbb4	apply clippy fixes	2021-07-01 17:08:44 +02:00
PSeitz	074b09d0c0	Merge pull request #1102 from PSeitz/proptests extend proptests for sorting and merge	2021-07-01 16:23:53 +02:00
Pascal Seitz	86d0727659	add facet test closes #1100	2021-07-01 15:36:17 +02:00
Pascal Seitz	be3e1b8718	cargo fmt	2021-07-01 14:02:09 +02:00
Pascal Seitz	8fdf59bdac	add search test for proptest	2021-07-01 14:01:30 +02:00
Pascal Seitz	ebebce2102	cargo fmt	2021-07-01 10:47:20 +02:00
Pascal Seitz	8044ec38da	test docstore in proptest	2021-07-01 10:15:42 +02:00
Pascal Seitz	7413f87265	use set instead of vec in proptest	2021-07-01 08:28:51 +02:00
PSeitz	aea2e77665	Merge pull request #1097 from PSeitz/multifastfield Use dynamic fastfield codes for multivalues fixes #1093	2021-06-30 14:38:26 +02:00
Pascal Seitz	a15845f9fd	add merge case to proptest, test multivalue fastfields #1100	2021-06-30 13:13:33 +02:00
Pascal Seitz	94ac44df4f	proptest with optional sorting	2021-06-30 12:06:03 +02:00
Pascal Seitz	f80d804a57	add random commits in proptest	2021-06-30 11:18:07 +02:00
Pascal Seitz	3b5c1d7817	use measure_time 0.7	2021-06-30 11:08:02 +02:00
Pascal Seitz	24274edf81	remove trait impl fpr &Vec	2021-06-30 09:50:47 +02:00
Paul Masurel	d58497529b	Fixed CHANGELOG to include 0.15.2.	2021-06-30 16:34:47 +09:00
Pascal Seitz	130495abab	cleanup	2021-06-30 08:57:55 +02:00
Pascal Seitz	9b743d60fb	make docid mapping non optional make docid mapping non optional add trivial flag for docid mapping add time measurements	2021-06-30 08:57:55 +02:00
Pascal Seitz	5c9e2ef036	wrap docidmapping in struct	2021-06-30 08:57:55 +02:00
Pascal Seitz	8526434b63	add dynamic fastfield case add dynamic fastfield for single fast field unsorted fix scary documentation bug add num_len instead of len	2021-06-30 08:57:55 +02:00
Pascal Seitz	6ba302c481	Use dynamic fastfield codes for multivalues fixes #1093 Use dynamic fastfield codes for multivalues fixes (only sorting case covered) Rename get to get_val due to conflict with Vec use u64 precision instead of u32 for get_range, to allow use of existing fast field interface interface (actually not sure if it would be better have a different interface)	2021-06-30 08:57:55 +02:00
Paul Masurel	de92f094aa	Closes #1101 fix delete documents with sort by field Closes #1101 * fix delete documents with sort by field Co-authored-by: Andre-Philippe Paquet <appaquet@gmail.com>	2021-06-30 15:51:32 +09:00
Evance Soumaoro	c82cee66de	exposing min/max value interface on MultiValuedFastField Reader (#1096 )	2021-06-23 17:38:50 +09:00
Paul Masurel	6eed05b1ce	Revert "Exposing min/max value interface on MultiValuedFastField Reader (#1094 )" (#1095 ) This reverts commit `bb488305c9`.	2021-06-23 10:25:11 +09:00
Evance Soumaoro	bb488305c9	Exposing min/max value interface on MultiValuedFastField Reader (#1094 ) Exposing min/max value interface on MultiValuedFastField Reader	2021-06-23 08:51:36 +09:00