first stab

Added wasm-mt
Update lru requirement from 0.6.5 to 0.7.0 (#1165 )
2026-02-26 17:50:36 +00:00 · 2021-10-06 12:10:16 +09:00 · 2021-10-06 10:45:17 +09:00 · 2021-10-06 05:50:24 +09:00 · 2021-10-05 18:53:29 +09:00 · 2021-10-01 08:03:41 +02:00
105 changed files with 2066 additions and 1196 deletions
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -1,27 +1,25 @@
-name:                           coverage
+name: Coverage

 on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
+
 jobs:
-  test:
-    name:                       coverage
-    runs-on:                    ubuntu-latest
-    container:
-      image:                    xd009642/tarpaulin:develop-nightly
-      options:                  --security-opt seccomp=unconfined
+  coverage:
+    runs-on: ubuntu-latest
    steps:
-      - name:                   Checkout repository
-        uses:                   actions/checkout@v2
-
-      - name:                   Generate code coverage
-        run: |
-          cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out Xml
-
-      - name:                   Upload to codecov.io
-        uses:                   codecov/codecov-action@v1
+      - uses: actions/checkout@v2
+      - name: Install Rust
+        run: rustup toolchain install nightly --component llvm-tools-preview
+      - name: Install cargo-llvm-cov
+        run: curl -LsSf https://github.com/taiki-e/cargo-llvm-cov/releases/latest/download/cargo-llvm-cov-x86_64-unknown-linux-gnu.tar.gz | tar xzf - -C ~/.cargo/bin
+      - name: Generate code coverage
+        run: cargo llvm-cov --all-features --workspace --lcov --output-path lcov.info
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v1
        with:
-          # token:                ${{secrets.CODECOV_TOKEN}} # not required for public repos
-          fail_ci_if_error:     true
+          token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos
+          files: lcov.info
+          fail_ci_if_error: true
--- a/.github/workflows/long_running.yml
+++ b/.github/workflows/long_running.yml
@@ -0,0 +1,24 @@
+name: Rust
+
+on:
+  push:
+    branches: [ main ]
+
+env:
+  CARGO_TERM_COLOR: always
+  NUM_FUNCTIONAL_TEST_ITERATIONS: 20000
+
+jobs:
+  functional_test_unsorted:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Run indexing_unsorted
+      run: cargo test indexing_unsorted -- --ignored
+  functional_test_sorted:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Run indexing_sorted
+      run: cargo test indexing_sorted -- --ignored
+
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -10,7 +10,7 @@ env:
  CARGO_TERM_COLOR: always

 jobs:
-  build:
+  test:

    runs-on: ubuntu-latest

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,12 @@
+Tantivy 0.16.1
+========================
+- Major Bugfix on multivalued fastfield.  #1151
+
+Tantivy 0.16.0
+=========================
+- Bugfix in the filesum check. (@evanxg852000) #1127
+- Bugfix in positions when the index is sorted by a field. (@appaquet) #1125
+
 Tantivy 0.15.3
 =========================
 - Major bugfix. Deleting documents was broken when the index was sorted by a field. (@appaquet, @fulmicoton) #1101
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.16.0-dev"
+version = "0.16.1"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -19,23 +19,24 @@ crc32fast = "1.2.1"
 once_cell = "1.7.2"
 regex ={ version = "1.5.4", default-features = false, features = ["std"] }
 tantivy-fst = "0.3"
-memmap = {version = "0.7", optional=true}
-lz4_flex = { version = "0.8.0", default-features = false, features = ["checked-decode"], optional = true }
+memmap2 = {version = "0.5", optional=true}
+lz4_flex = { version = "0.9.0", default-features = false, features = ["checked-decode"], optional = true }
 brotli = { version = "3.3", optional = true }
 snap = { version = "1.0.5", optional = true }
 tempfile = { version = "3.2", optional = true }
 log = "0.4.14"
 serde = { version = "1.0.126", features = ["derive"] }
+serde_closure = "0.3"
 serde_json = "1.0.64"
 num_cpus = "1.13"
 fs2={ version = "0.4.3", optional = true }
 levenshtein_automata = "0.2"
 uuid = { version = "0.8.2", features = ["v4", "serde"] }
-crossbeam = "0.8"
+crossbeam = "0.8.1"
 futures = { version = "0.3.15", features = ["thread-pool"] }
 tantivy-query-grammar = { version="0.15.0", path="./query-grammar" }
 tantivy-bitpacker = { version="0.1", path="./bitpacker" }
-common = { version="0.1", path="./common" }
+common = { version = "0.1", path = "./common/", package = "tantivy-common" }
 fastfield_codecs = { version="0.1", path="./fastfield_codecs", default-features = false }
 ownedbytes = { version="0.1", path="./ownedbytes" }
 stable_deref_trait = "1.2"
@@ -50,11 +51,12 @@ fail = "0.4"
 murmurhash32 = "0.2"
 chrono = "0.4.19"
 smallvec = "1.6.1"
-rayon = "1.5"
-lru = "0.6.5"
+lru = "0.7.0"
 fastdivide = "0.3"
 itertools = "0.10.0"
 measure_time = "0.7.0"
+wasm-mt = "0.1"
+wasm-mt-pool = "0.1"

 [target.'cfg(windows)'.dependencies]
 winapi = "0.3.9"
@@ -64,7 +66,9 @@ rand = "0.8.3"
 maplit = "1.0.2"
 matches = "0.1.8"
 proptest = "1.0"
-criterion = "0.3.4"
+criterion = "0.3.5"
+test-env-log = "0.2.7"
+env_logger = "0.9.0"

 [dev-dependencies.fail]
 version = "0.4"
@@ -81,7 +85,7 @@ overflow-checks = true

 [features]
 default = ["mmap", "lz4-compression" ]
-mmap = ["fs2", "tempfile", "memmap"]
+mmap = ["fs2", "tempfile", "memmap2"]

 brotli-compression = ["brotli"]
 lz4-compression = ["lz4_flex"]
--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@

-[![Build Status](https://travis-ci.org/tantivy-search/tantivy.svg?branch=main)](https://travis-ci.org/tantivy-search/tantivy)
+[![Docs](https://docs.rs/tantivy/badge.svg)](https://docs.rs/crate/tantivy/)
+[![Build Status](https://github.com/tantivy-search/tantivy/actions/workflows/test.yml/badge.svg)](https://github.com/tantivy-search/tantivy/actions/workflows/test.yml)
 [![codecov](https://codecov.io/gh/tantivy-search/tantivy/branch/main/graph/badge.svg)](https://codecov.io/gh/tantivy-search/tantivy)
 [![Join the chat at https://gitter.im/tantivy-search/tantivy](https://badges.gitter.im/tantivy-search/tantivy.svg)](https://gitter.im/tantivy-search/tantivy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![Build status](https://ci.appveyor.com/api/projects/status/r7nb13kj23u8m9pj/branch/main?svg=true)](https://ci.appveyor.com/project/fulmicoton/tantivy/branch/main)
 [![Crates.io](https://img.shields.io/crates/v/tantivy.svg)](https://crates.io/crates/tantivy)

 ![Tantivy](https://tantivy-search.github.io/logo/tantivy-logo.png)
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-bitpacker"
-version = "0.1.0"
+version = "0.1.1"
 edition = "2018"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
--- a/bitpacker/src/lib.rs
+++ b/bitpacker/src/lib.rs
@@ -50,3 +50,32 @@ where
    }
    None
 }
+
+#[test]
+fn test_compute_num_bits() {
+    assert_eq!(compute_num_bits(1), 1u8);
+    assert_eq!(compute_num_bits(0), 0u8);
+    assert_eq!(compute_num_bits(2), 2u8);
+    assert_eq!(compute_num_bits(3), 2u8);
+    assert_eq!(compute_num_bits(4), 3u8);
+    assert_eq!(compute_num_bits(255), 8u8);
+    assert_eq!(compute_num_bits(256), 9u8);
+    assert_eq!(compute_num_bits(5_000_000_000), 33u8);
+}
+
+#[test]
+fn test_minmax_empty() {
+    let vals: Vec<u32> = vec![];
+    assert_eq!(minmax(vals.into_iter()), None);
+}
+
+#[test]
+fn test_minmax_one() {
+    assert_eq!(minmax(vec![1].into_iter()), Some((1, 1)));
+}
+
+#[test]
+fn test_minmax_two() {
+    assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2)));
+    assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2)));
+}
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "common"
+name = "tantivy-common"
 version = "0.1.0"
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 license = "MIT"
@@ -10,3 +10,8 @@ description = "common traits and utility functions used by multiple tantivy subc

 [dependencies]
 byteorder = "1.4.3"
+ownedbytes = { version="0.1", path="../ownedbytes" }
+
+[dev-dependencies]
+proptest = "1.0.0"
+rand = "0.8.4"
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -1,8 +1,11 @@
-use std::fmt;
+use ownedbytes::OwnedBytes;
+use std::convert::TryInto;
+use std::io::Write;
 use std::u64;
+use std::{fmt, io};

 #[derive(Clone, Copy, Eq, PartialEq)]
-pub(crate) struct TinySet(u64);
+pub struct TinySet(u64);

 impl fmt::Debug for TinySet {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -14,6 +17,7 @@ pub struct TinySetIterator(TinySet);
 impl Iterator for TinySetIterator {
    type Item = u32;

+    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        self.0.pop_lowest()
    }
@@ -28,30 +32,54 @@ impl IntoIterator for TinySet {
 }

 impl TinySet {
+    pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
+        writer.write_all(self.0.to_le_bytes().as_ref())
+    }
+
+    #[inline]
+    pub fn deserialize(data: [u8; 8]) -> io::Result<Self> {
+        let val: u64 = u64::from_le_bytes(data);
+        Ok(TinySet(val))
+    }
+
    /// Returns an empty `TinySet`.
+    #[inline]
    pub fn empty() -> TinySet {
        TinySet(0u64)
    }

+    /// Returns a full `TinySet`.
+    #[inline]
+    pub fn full() -> TinySet {
+        TinySet::empty().complement()
+    }
+
    pub fn clear(&mut self) {
        self.0 = 0u64;
    }

+    #[inline]
    /// Returns the complement of the set in `[0, 64[`.
+    ///
+    /// Careful on making this function public, as it will break the padding handling in the last
+    /// bucket.
    fn complement(self) -> TinySet {
        TinySet(!self.0)
    }

+    #[inline]
    /// Returns true iff the `TinySet` contains the element `el`.
    pub fn contains(self, el: u32) -> bool {
        !self.intersect(TinySet::singleton(el)).is_empty()
    }

+    #[inline]
    /// Returns the number of elements in the TinySet.
    pub fn len(self) -> u32 {
        self.0.count_ones()
    }

+    #[inline]
    /// Returns the intersection of `self` and `other`
    pub fn intersect(self, other: TinySet) -> TinySet {
        TinySet(self.0 & other.0)
@@ -64,13 +92,21 @@ impl TinySet {
        TinySet(1u64 << u64::from(el))
    }

-    /// Insert a new element within [0..64[
+    /// Insert a new element within [0..64)
    #[inline]
    pub fn insert(self, el: u32) -> TinySet {
        self.union(TinySet::singleton(el))
    }

-    /// Insert a new element within [0..64[
+    /// Removes an element within [0..64)
+    #[inline]
+    pub fn remove(self, el: u32) -> TinySet {
+        self.intersect(TinySet::singleton(el).complement())
+    }
+
+    /// Insert a new element within [0..64)
+    ///
+    /// returns true if the set changed
    #[inline]
    pub fn insert_mut(&mut self, el: u32) -> bool {
        let old = *self;
@@ -78,6 +114,16 @@ impl TinySet {
        old != *self
    }

+    /// Remove a element within [0..64)
+    ///
+    /// returns true if the set changed
+    #[inline]
+    pub fn remove_mut(&mut self, el: u32) -> bool {
+        let old = *self;
+        *self = old.remove(el);
+        old != *self
+    }
+
    /// Returns the union of two tinysets
    #[inline]
    pub fn union(self, other: TinySet) -> TinySet {
@@ -123,7 +169,7 @@ impl TinySet {
 #[derive(Clone)]
 pub struct BitSet {
    tinysets: Box<[TinySet]>,
-    len: usize,
+    len: u64,
    max_value: u32,
 }

@@ -132,8 +178,41 @@ fn num_buckets(max_val: u32) -> u32 {
 }

 impl BitSet {
+    /// serialize a `BitSet`.
+    ///
+    pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
+        writer.write_all(self.max_value.to_le_bytes().as_ref())?;
+
+        for tinyset in self.tinysets.iter() {
+            tinyset.serialize(writer)?;
+        }
+        writer.flush()?;
+        Ok(())
+    }
+
+    /// Deserialize a `BitSet`.
+    ///
+    #[cfg(test)]
+    pub fn deserialize(mut data: &[u8]) -> io::Result<Self> {
+        let max_value: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
+        data = &data[4..];
+
+        let mut len: u64 = 0;
+        let mut tinysets = vec![];
+        for chunk in data.chunks_exact(8) {
+            let tinyset = TinySet::deserialize(chunk.try_into().unwrap())?;
+            len += tinyset.len() as u64;
+            tinysets.push(tinyset);
+        }
+        Ok(BitSet {
+            tinysets: tinysets.into_boxed_slice(),
+            len,
+            max_value,
+        })
+    }
+
    /// Create a new `BitSet` that may contain elements
-    /// within `[0, max_val[`.
+    /// within `[0, max_val)`.
    pub fn with_max_value(max_value: u32) -> BitSet {
        let num_buckets = num_buckets(max_value);
        let tinybisets = vec![TinySet::empty(); num_buckets as usize].into_boxed_slice();
@@ -144,6 +223,23 @@ impl BitSet {
        }
    }

+    /// Create a new `BitSet` that may contain elements. Initially all values will be set.
+    /// within `[0, max_val)`.
+    pub fn with_max_value_and_full(max_value: u32) -> BitSet {
+        let num_buckets = num_buckets(max_value);
+        let mut tinybisets = vec![TinySet::full(); num_buckets as usize].into_boxed_slice();
+
+        // Fix padding
+        let lower = max_value % 64u32;
+        tinybisets[tinybisets.len() - 1] = TinySet::range_lower(lower);
+
+        BitSet {
+            tinysets: tinybisets,
+            len: max_value as u64,
+            max_value,
+        }
+    }
+
    /// Removes all elements from the `BitSet`.
    pub fn clear(&mut self) {
        for tinyset in self.tinysets.iter_mut() {
@@ -153,10 +249,11 @@ impl BitSet {

    /// Returns the number of elements in the `BitSet`.
    pub fn len(&self) -> usize {
-        self.len
+        self.len as usize
    }

    /// Inserts an element in the `BitSet`
+    #[inline]
    pub fn insert(&mut self, el: u32) {
        // we do not check saturated els.
        let higher = el / 64u32;
@@ -168,7 +265,21 @@ impl BitSet {
        };
    }

+    /// Inserts an element in the `BitSet`
+    #[inline]
+    pub fn remove(&mut self, el: u32) {
+        // we do not check saturated els.
+        let higher = el / 64u32;
+        let lower = el % 64u32;
+        self.len -= if self.tinysets[higher as usize].remove_mut(lower) {
+            1
+        } else {
+            0
+        };
+    }
+
    /// Returns true iff the elements is in the `BitSet`.
+    #[inline]
    pub fn contains(&self, el: u32) -> bool {
        self.tinyset(el / 64u32).contains(el % 64)
    }
@@ -178,7 +289,7 @@ impl BitSet {
    ///
    /// Reminder: the tiny set with the bucket `bucket`, represents the
    /// elements from `bucket * 64` to `(bucket+1) * 64`.
-    pub(crate) fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
+    pub fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
        self.tinysets[bucket as usize..]
            .iter()
            .cloned()
@@ -193,23 +304,149 @@ impl BitSet {
    /// Returns the tiny bitset representing the
    /// the set restricted to the number range from
    /// `bucket * 64` to `(bucket + 1) * 64`.
-    pub(crate) fn tinyset(&self, bucket: u32) -> TinySet {
+    pub fn tinyset(&self, bucket: u32) -> TinySet {
        self.tinysets[bucket as usize]
    }
 }

+/// Serialized BitSet.
+#[derive(Clone)]
+pub struct ReadSerializedBitSet {
+    data: OwnedBytes,
+    max_value: u32,
+}
+
+impl ReadSerializedBitSet {
+    pub fn open(data: OwnedBytes) -> Self {
+        let (max_value_data, data) = data.split(4);
+        let max_value: u32 = u32::from_le_bytes(max_value_data.as_ref().try_into().unwrap());
+        ReadSerializedBitSet { data, max_value }
+    }
+
+    /// Number of elements in the bitset.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.iter_tinysets()
+            .map(|tinyset| tinyset.len() as usize)
+            .sum()
+    }
+
+    /// Iterate the tinyset on the fly from serialized data.
+    ///
+    #[inline]
+    fn iter_tinysets<'a>(&'a self) -> impl Iterator<Item = TinySet> + 'a {
+        assert!((self.data.len()) % 8 == 0);
+        self.data.chunks_exact(8).map(move |chunk| {
+            let tinyset: TinySet = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
+            tinyset
+        })
+    }
+
+    /// Iterate over the positions of the elements.
+    ///
+    #[inline]
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = u32> + 'a {
+        self.iter_tinysets()
+            .enumerate()
+            .flat_map(move |(chunk_num, tinyset)| {
+                let chunk_base_val = chunk_num as u32 * 64;
+                tinyset
+                    .into_iter()
+                    .map(move |val| val + chunk_base_val)
+                    .take_while(move |doc| *doc < self.max_value)
+            })
+    }
+
+    /// Returns true iff the elements is in the `BitSet`.
+    #[inline]
+    pub fn contains(&self, el: u32) -> bool {
+        let byte_offset = el / 8u32;
+        let b: u8 = self.data[byte_offset as usize];
+        let shift = (el % 8) as u8;
+        b & (1u8 << shift) != 0
+    }
+
+    /// Maximum value the bitset may contain.
+    /// (Note this is not the maximum value contained in the set.)
+    ///
+    /// A bitset has an intrinsic capacity.
+    /// It only stores elements within [0..max_value).
+    #[inline]
+    pub fn max_value(&self) -> u32 {
+        self.max_value
+    }
+}
+
 #[cfg(test)]
 mod tests {

    use super::BitSet;
+    use super::ReadSerializedBitSet;
    use super::TinySet;
-    use crate::docset::{DocSet, TERMINATED};
-    use crate::query::BitSetDocSet;
-    use crate::tests;
-    use crate::tests::generate_nonunique_unsorted;
-    use std::collections::BTreeSet;
+    use ownedbytes::OwnedBytes;
+    use rand::distributions::Bernoulli;
+    use rand::rngs::StdRng;
+    use rand::{Rng, SeedableRng};
    use std::collections::HashSet;
+    use std::convert::TryInto;

+    #[test]
+    fn test_read_serialized_bitset_full() {
+        let mut bitset = BitSet::with_max_value_and_full(5);
+        bitset.remove(3);
+        let mut out = vec![];
+        bitset.serialize(&mut out).unwrap();
+
+        let bitset = ReadSerializedBitSet::open(OwnedBytes::new(out));
+        assert_eq!(bitset.len(), 4);
+    }
+
+    #[test]
+    fn test_read_serialized_bitset_empty() {
+        let mut bitset = BitSet::with_max_value(5);
+        bitset.insert(3);
+        let mut out = vec![];
+        bitset.serialize(&mut out).unwrap();
+
+        let bitset = ReadSerializedBitSet::open(OwnedBytes::new(out));
+        assert_eq!(bitset.len(), 1);
+
+        {
+            let bitset = BitSet::with_max_value(5);
+            let mut out = vec![];
+            bitset.serialize(&mut out).unwrap();
+            let bitset = ReadSerializedBitSet::open(OwnedBytes::new(out));
+            assert_eq!(bitset.len(), 0);
+        }
+    }
+
+    #[test]
+    fn test_tiny_set_remove() {
+        {
+            let mut u = TinySet::empty().insert(63u32).insert(5).remove(63u32);
+            assert_eq!(u.pop_lowest(), Some(5u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty()
+                .insert(63u32)
+                .insert(1)
+                .insert(5)
+                .remove(63u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert_eq!(u.pop_lowest(), Some(5u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty().insert(1).remove(63u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty().insert(1).remove(1u32);
+            assert!(u.pop_lowest().is_none());
+        }
+    }
    #[test]
    fn test_tiny_set() {
        assert!(TinySet::empty().is_empty());
@@ -235,6 +472,21 @@ mod tests {
            assert_eq!(u.pop_lowest(), Some(63u32));
            assert!(u.pop_lowest().is_none());
        }
+        {
+            let mut u = TinySet::empty().insert(63u32).insert(5);
+            assert_eq!(u.pop_lowest(), Some(5u32));
+            assert_eq!(u.pop_lowest(), Some(63u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let u = TinySet::empty().insert(63u32).insert(5);
+            let mut data = vec![];
+            u.serialize(&mut data).unwrap();
+            let mut u = TinySet::deserialize(data[..8].try_into().unwrap()).unwrap();
+            assert_eq!(u.pop_lowest(), Some(5u32));
+            assert_eq!(u.pop_lowest(), Some(63u32));
+            assert!(u.pop_lowest().is_none());
+        }
    }

    #[test]
@@ -251,6 +503,16 @@ mod tests {
                assert_eq!(hashset.contains(&el), bitset.contains(el));
            }
            assert_eq!(bitset.max_value(), max_value);
+
+            // test deser
+            let mut data = vec![];
+            bitset.serialize(&mut data).unwrap();
+            let bitset = BitSet::deserialize(&data).unwrap();
+            for el in 0..max_value {
+                assert_eq!(hashset.contains(&el), bitset.contains(el));
+            }
+            assert_eq!(bitset.max_value(), max_value);
+            assert_eq!(bitset.len(), els.len());
        };

        test_against_hashset(&[], 0);
@@ -263,29 +525,6 @@ mod tests {
        test_against_hashset(&[62u32, 63u32], 64);
    }

-    #[test]
-    fn test_bitset_large() {
-        let arr = generate_nonunique_unsorted(100_000, 5_000);
-        let mut btreeset: BTreeSet<u32> = BTreeSet::new();
-        let mut bitset = BitSet::with_max_value(100_000);
-        for el in arr {
-            btreeset.insert(el);
-            bitset.insert(el);
-        }
-        for i in 0..100_000 {
-            assert_eq!(btreeset.contains(&i), bitset.contains(i));
-        }
-        assert_eq!(btreeset.len(), bitset.len());
-        let mut bitset_docset = BitSetDocSet::from(bitset);
-        let mut remaining = true;
-        for el in btreeset.into_iter() {
-            assert!(remaining);
-            assert_eq!(bitset_docset.doc(), el);
-            remaining = bitset_docset.advance() != TERMINATED;
-        }
-        assert!(!remaining);
-    }
-
    #[test]
    fn test_bitset_num_buckets() {
        use super::num_buckets;
@@ -338,12 +577,33 @@ mod tests {
        assert_eq!(bitset.len(), 2);
        bitset.insert(104u32);
        assert_eq!(bitset.len(), 3);
+        bitset.remove(105u32);
+        assert_eq!(bitset.len(), 3);
+        bitset.remove(104u32);
+        assert_eq!(bitset.len(), 2);
+        bitset.remove(3u32);
+        assert_eq!(bitset.len(), 1);
+        bitset.remove(103u32);
+        assert_eq!(bitset.len(), 0);
+    }
+
+    pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
+        StdRng::from_seed([seed_val; 32])
+            .sample_iter(&Bernoulli::new(ratio).unwrap())
+            .take(n as usize)
+            .enumerate()
+            .filter_map(|(val, keep)| if keep { Some(val as u32) } else { None })
+            .collect()
+    }
+
+    pub fn sample(n: u32, ratio: f64) -> Vec<u32> {
+        sample_with_seed(n, ratio, 4)
    }

    #[test]
    fn test_bitset_clear() {
        let mut bitset = BitSet::with_max_value(1_000);
-        let els = tests::sample(1_000, 0.01f64);
+        let els = sample(1_000, 0.01f64);
        for &el in &els {
            bitset.insert(el);
        }
--- a/common/src/lib.rs
+++ b/common/src/lib.rs
@@ -1,9 +1,167 @@
+use std::ops::Deref;
+
 pub use byteorder::LittleEndian as Endianness;

+mod bitset;
 mod serialize;
 mod vint;
 mod writer;

+pub use bitset::*;
 pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
 pub use vint::{read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt};
 pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};
+
+/// Has length trait
+pub trait HasLen {
+    /// Return length
+    fn len(&self) -> usize;
+
+    /// Returns true iff empty.
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+impl<T: Deref<Target = [u8]>> HasLen for T {
+    fn len(&self) -> usize {
+        self.deref().len()
+    }
+}
+
+const HIGHEST_BIT: u64 = 1 << 63;
+
+/// Maps a `i64` to `u64`
+///
+/// For simplicity, tantivy internally handles `i64` as `u64`.
+/// The mapping is defined by this function.
+///
+/// Maps `i64` to `u64` so that
+/// `-2^63 .. 2^63-1` is mapped
+///     to
+/// `0 .. 2^64-1`
+/// in that order.
+///
+/// This is more suited than simply casting (`val as u64`)
+/// because of bitpacking.
+///
+/// Imagine a list of `i64` ranging from -10 to 10.
+/// When casting negative values, the negative values are projected
+/// to values over 2^63, and all values end up requiring 64 bits.
+///
+/// # See also
+/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html).
+#[inline]
+pub fn i64_to_u64(val: i64) -> u64 {
+    (val as u64) ^ HIGHEST_BIT
+}
+
+/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
+#[inline]
+pub fn u64_to_i64(val: u64) -> i64 {
+    (val ^ HIGHEST_BIT) as i64
+}
+
+/// Maps a `f64` to `u64`
+///
+/// For simplicity, tantivy internally handles `f64` as `u64`.
+/// The mapping is defined by this function.
+///
+/// Maps `f64` to `u64` in a monotonic manner, so that bytes lexical order is preserved.
+///
+/// This is more suited than simply casting (`val as u64`)
+/// which would truncate the result
+///
+/// # Reference
+///
+/// Daniel Lemire's [blog post](https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order/)
+/// explains the mapping in a clear manner.
+///
+/// # See also
+/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html).
+#[inline]
+pub fn f64_to_u64(val: f64) -> u64 {
+    let bits = val.to_bits();
+    if val.is_sign_positive() {
+        bits ^ HIGHEST_BIT
+    } else {
+        !bits
+    }
+}
+
+/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
+#[inline]
+pub fn u64_to_f64(val: u64) -> f64 {
+    f64::from_bits(if val & HIGHEST_BIT != 0 {
+        val ^ HIGHEST_BIT
+    } else {
+        !val
+    })
+}
+
+#[cfg(test)]
+pub mod test {
+
+    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
+    use super::{BinarySerializable, FixedSize};
+    use proptest::prelude::*;
+    use std::f64;
+
+    fn test_i64_converter_helper(val: i64) {
+        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
+    }
+
+    fn test_f64_converter_helper(val: f64) {
+        assert_eq!(u64_to_f64(f64_to_u64(val)), val);
+    }
+
+    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
+        let mut buffer = Vec::new();
+        O::default().serialize(&mut buffer).unwrap();
+        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
+    }
+
+    proptest! {
+        #[test]
+        fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) {
+            let left_u64 = f64_to_u64(left);
+            let right_u64 = f64_to_u64(right);
+            assert_eq!(left_u64 < right_u64,  left < right);
+        }
+    }
+
+    #[test]
+    fn test_i64_converter() {
+        assert_eq!(i64_to_u64(i64::min_value()), u64::min_value());
+        assert_eq!(i64_to_u64(i64::max_value()), u64::max_value());
+        test_i64_converter_helper(0i64);
+        test_i64_converter_helper(i64::min_value());
+        test_i64_converter_helper(i64::max_value());
+        for i in -1000i64..1000i64 {
+            test_i64_converter_helper(i);
+        }
+    }
+
+    #[test]
+    fn test_f64_converter() {
+        test_f64_converter_helper(f64::INFINITY);
+        test_f64_converter_helper(f64::NEG_INFINITY);
+        test_f64_converter_helper(0.0);
+        test_f64_converter_helper(-0.0);
+        test_f64_converter_helper(1.0);
+        test_f64_converter_helper(-1.0);
+    }
+
+    #[test]
+    fn test_f64_order() {
+        assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
+            .contains(&f64_to_u64(f64::NAN))); //nan is not a number
+        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
+        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
+        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
+        assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
+        assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
+        assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
+        assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5));
+    }
+}
--- a/doc/src/SUMMARY.md
+++ b/doc/src/SUMMARY.md
@@ -7,6 +7,7 @@
 - [Segments](./basis.md)
 - [Defining your schema](./schema.md)
 - [Facetting](./facetting.md)
+- [Index Sorting](./index_sorting.md)
 - [Innerworkings](./innerworkings.md)
  - [Inverted index](./inverted_index.md)
 - [Best practise](./inverted_index.md)
--- a/doc/src/index_sorting.md
+++ b/doc/src/index_sorting.md
@@ -0,0 +1,61 @@
+
+- [Index Sorting](#index-sorting)
+    + [Why Sorting](#why-sorting)
+        * [Compression](#compression)
+        * [Top-N Optimization](#top-n-optimization)
+        * [Pruning](#pruning)
+        * [Other](#other)
+    + [Usage](#usage)
+
+# Index Sorting
+
+Tantivy allows you to sort the index according to a property.
+
+## Why Sorting
+
+Presorting an index has several advantages:
+
+###### Compression
+
+When data is sorted it is easier to compress the data. E.g. the numbers sequence [5, 2, 3, 1, 4] would be sorted to [1, 2, 3, 4, 5]. 
+If we apply delta encoding this list would be unsorted [5, -3, 1, -2, 3] vs. [1, 1, 1, 1, 1].
+Compression ratio is mainly affected on the fast field of the sorted property, every thing else is likely unaffected. 
+###### Top-N Optimization
+
+When data is presorted by a field and search queries request sorting by the same field, we can leverage the natural order of the documents. 
+E.g. if the data is sorted by timestamp and want the top n newest docs containing a term, we can simply leveraging the order of the docids.
+
+Note: Tantivy 0.16 does not do this optimization yet.
+
+###### Pruning
+
+Let's say we want all documents and want to apply the filter `>= 2010-08-11`. When the data is sorted, we could make a lookup in the fast field to find the docid range and use this as the filter.
+
+Note: Tantivy 0.16 does not do this optimization yet.
+
+###### Other?
+
+In principle there are many algorithms possible that exploit the monotonically increasing nature. (aggregations maybe?)
+
+## Usage
+The index sorting can be configured setting [`sort_by_field`](https://github.com/tantivy-search/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/core/index_meta.rs#L238) on `IndexSettings` and passing it to a `IndexBuilder`. As of tantvy 0.16 only fast fields are allowed to be used.
+
+```
+let settings = IndexSettings {
+    sort_by_field: Some(IndexSortByField {
+        field: "intval".to_string(),
+        order: Order::Desc,
+    }),
+    ..Default::default()
+};
+let mut index_builder = Index::builder().schema(schema);
+index_builder = index_builder.settings(settings);
+let index = index_builder.create_in_ram().unwrap();
+```
+
+## Implementation details
+
+Sorting an index is applied in the serialization step. In general there are two serialization steps: [Finishing a single segment](https://github.com/tantivy-search/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/indexer/segment_writer.rs#L338) and [merging multiple segments](https://github.com/tantivy-search/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/indexer/merger.rs#L1073).
+
+In both cases we generate a docid mapping reflecting the sort. This mapping is used when serializing the different components (doc store, fastfields, posting list, normfield, facets).
+
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -86,12 +86,10 @@ impl Collector for StatsCollector {

    fn merge_fruits(&self, segment_stats: Vec<Option<Stats>>) -> tantivy::Result<Option<Stats>> {
        let mut stats = Stats::default();
-        for segment_stats_opt in segment_stats {
-            if let Some(segment_stats) = segment_stats_opt {
-                stats.count += segment_stats.count;
-                stats.sum += segment_stats.sum;
-                stats.squared_sum += segment_stats.squared_sum;
-            }
+        for segment_stats in segment_stats.into_iter().flatten() {
+            stats.count += segment_stats.count;
+            stats.sum += segment_stats.sum;
+            stats.squared_sum += segment_stats.squared_sum;
        }
        Ok(stats.non_zero_count())
    }
--- a/fastfield_codecs/Cargo.toml
+++ b/fastfield_codecs/Cargo.toml
@@ -9,8 +9,8 @@ description = "Fast field codecs used by tantivy"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-common = { path = "../common/" }
-tantivy-bitpacker = { path = "../bitpacker/" }
+common = { version = "0.1", path = "../common/", package = "tantivy-common" }
+tantivy-bitpacker = { version="0.1.1", path = "../bitpacker/" }
 prettytable-rs = {version="0.8.0", optional= true}
 rand = {version="0.8.3", optional= true}

--- a/fastfield_codecs/src/lib.rs
+++ b/fastfield_codecs/src/lib.rs
@@ -118,7 +118,7 @@ mod tests {
                );
            }
        }
-        let actual_compression = data.len() as f32 / out.len() as f32;
+        let actual_compression = out.len() as f32 / (data.len() as f32 * 8.0);
        (estimation, actual_compression)
    }
    pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
--- a/fastfield_codecs/src/linearinterpol.rs
+++ b/fastfield_codecs/src/linearinterpol.rs
@@ -239,11 +239,21 @@ mod tests {
    use super::*;
    use crate::tests::get_codec_test_data_sets;

-    fn create_and_validate(data: &[u64], name: &str) {
+    fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
        crate::tests::create_and_validate::<
            LinearInterpolFastFieldSerializer,
            LinearInterpolFastFieldReader,
-        >(data, name);
+        >(data, name)
+    }
+
+    #[test]
+    fn test_compression() {
+        let data = (10..=6_000_u64).collect::<Vec<_>>();
+        let (estimate, actual_compression) =
+            create_and_validate(&data, "simple monotonically large");
+
+        assert!(actual_compression < 0.01);
+        assert!(estimate < 0.01);
    }

    #[test]
--- a/fastfield_codecs/src/multilinearinterpol.rs
+++ b/fastfield_codecs/src/multilinearinterpol.rs
@@ -1,3 +1,17 @@
+/*!
+
+MultiLinearInterpol compressor uses linear interpolation to guess a values and stores the offset, but in blocks of 512.
+
+With a CHUNK_SIZE of 512 and 29 byte metadata per block, we get a overhead for metadata of 232 / 512 = 0,45 bits per element.
+The additional space required per element in a block is the the maximum deviation of the linear interpolation estimation function.
+
+E.g. if the maximum deviation of an element is 12, all elements cost 4bits.
+
+Size per block:
+Num Elements * Maximum Deviation from Interpolation + 29 Byte Metadata
+
+*/
+
 use crate::FastFieldCodecReader;
 use crate::FastFieldCodecSerializer;
 use crate::FastFieldDataAccess;
@@ -43,7 +57,7 @@ struct Function {
 impl Function {
    fn calc_slope(&mut self) {
        let num_vals = self.end_pos - self.start_pos;
-        get_slope(self.value_start_pos, self.value_end_pos, num_vals);
+        self.slope = get_slope(self.value_start_pos, self.value_end_pos, num_vals);
    }
    // split the interpolation into two function, change self and return the second split
    fn split(&mut self, split_pos: u64, split_pos_value: u64) -> Function {
@@ -364,11 +378,22 @@ mod tests {
    use super::*;
    use crate::tests::get_codec_test_data_sets;

-    fn create_and_validate(data: &[u64], name: &str) {
+    fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
        crate::tests::create_and_validate::<
            MultiLinearInterpolFastFieldSerializer,
            MultiLinearInterpolFastFieldReader,
-        >(data, name);
+        >(data, name)
+    }
+
+    #[test]
+    fn test_compression() {
+        let data = (10..=6_000_u64).collect::<Vec<_>>();
+        let (estimate, actual_compression) =
+            create_and_validate(&data, "simple monotonically large");
+        assert!(actual_compression < 0.2);
+        assert!(estimate < 0.20);
+        assert!(estimate > 0.15);
+        assert!(actual_compression > 0.01);
    }

    #[test]
@@ -400,9 +425,11 @@ mod tests {
    fn rand() {
        for _ in 0..10 {
            let mut data = (5_000..20_000)
-                .map(|_| rand::random::<u64>() as u64)
+                .map(|_| rand::random::<u32>() as u64)
                .collect::<Vec<_>>();
-            create_and_validate(&data, "random");
+            let (estimate, actual_compression) = create_and_validate(&data, "random");
+            dbg!(estimate);
+            dbg!(actual_compression);

            data.reverse();
            create_and_validate(&data, "random");
--- a/ownedbytes/Cargo.toml
+++ b/ownedbytes/Cargo.toml
@@ -4,6 +4,7 @@ name = "ownedbytes"
 version = "0.1.0"
 edition = "2018"
 description = "Expose data as static slice"
+license = "MIT"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -178,9 +178,9 @@ pub trait Collector: Sync + Send {
    ) -> crate::Result<<Self::Child as SegmentCollector>::Fruit> {
        let mut segment_collector = self.for_segment(segment_ord as u32, reader)?;

-        if let Some(delete_bitset) = reader.delete_bitset() {
+        if let Some(alive_bitset) = reader.alive_bitset() {
            weight.for_each(reader, &mut |doc, score| {
-                if delete_bitset.is_alive(doc) {
+                if alive_bitset.is_alive(doc) {
                    segment_collector.collect(doc, score);
                }
            })?;
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -629,10 +629,10 @@ impl Collector for TopDocs {
        let heap_len = self.0.limit + self.0.offset;
        let mut heap: BinaryHeap<ComparableDoc<Score, DocId>> = BinaryHeap::with_capacity(heap_len);

-        if let Some(delete_bitset) = reader.delete_bitset() {
+        if let Some(alive_bitset) = reader.alive_bitset() {
            let mut threshold = Score::MIN;
            weight.for_each_pruning(threshold, reader, &mut |doc, score| {
-                if delete_bitset.is_deleted(doc) {
+                if alive_bitset.is_deleted(doc) {
                    return threshold;
                }
                let heap_item = ComparableDoc {
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -1,203 +0,0 @@
-mod bitset;
-mod composite_file;
-
-pub use self::bitset::BitSet;
-pub(crate) use self::bitset::TinySet;
-pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
-pub use byteorder::LittleEndian as Endianness;
-pub use common::CountingWriter;
-pub use common::{
-    read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt,
-};
-pub use common::{BinarySerializable, DeserializeFrom, FixedSize};
-
-/// Segment's max doc must be `< MAX_DOC_LIMIT`.
-///
-/// We do not allow segments with more than
-pub const MAX_DOC_LIMIT: u32 = 1 << 31;
-
-/// Has length trait
-pub trait HasLen {
-    /// Return length
-    fn len(&self) -> usize;
-
-    /// Returns true iff empty.
-    fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-}
-
-const HIGHEST_BIT: u64 = 1 << 63;
-
-/// Maps a `i64` to `u64`
-///
-/// For simplicity, tantivy internally handles `i64` as `u64`.
-/// The mapping is defined by this function.
-///
-/// Maps `i64` to `u64` so that
-/// `-2^63 .. 2^63-1` is mapped
-///     to
-/// `0 .. 2^64-1`
-/// in that order.
-///
-/// This is more suited than simply casting (`val as u64`)
-/// because of bitpacking.
-///
-/// Imagine a list of `i64` ranging from -10 to 10.
-/// When casting negative values, the negative values are projected
-/// to values over 2^63, and all values end up requiring 64 bits.
-///
-/// # See also
-/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html).
-#[inline]
-pub fn i64_to_u64(val: i64) -> u64 {
-    (val as u64) ^ HIGHEST_BIT
-}
-
-/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
-#[inline]
-pub fn u64_to_i64(val: u64) -> i64 {
-    (val ^ HIGHEST_BIT) as i64
-}
-
-/// Maps a `f64` to `u64`
-///
-/// For simplicity, tantivy internally handles `f64` as `u64`.
-/// The mapping is defined by this function.
-///
-/// Maps `f64` to `u64` in a monotonic manner, so that bytes lexical order is preserved.
-///
-/// This is more suited than simply casting (`val as u64`)
-/// which would truncate the result
-///
-/// # Reference
-///
-/// Daniel Lemire's [blog post](https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order/)
-/// explains the mapping in a clear manner.
-///
-/// # See also
-/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html).
-#[inline]
-pub fn f64_to_u64(val: f64) -> u64 {
-    let bits = val.to_bits();
-    if val.is_sign_positive() {
-        bits ^ HIGHEST_BIT
-    } else {
-        !bits
-    }
-}
-
-/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
-#[inline]
-pub fn u64_to_f64(val: u64) -> f64 {
-    f64::from_bits(if val & HIGHEST_BIT != 0 {
-        val ^ HIGHEST_BIT
-    } else {
-        !val
-    })
-}
-
-#[cfg(test)]
-pub(crate) mod test {
-
-    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
-    use common::{BinarySerializable, FixedSize};
-    use proptest::prelude::*;
-    use std::f64;
-    use tantivy_bitpacker::compute_num_bits;
-    pub use tantivy_bitpacker::minmax;
-
-    fn test_i64_converter_helper(val: i64) {
-        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
-    }
-
-    fn test_f64_converter_helper(val: f64) {
-        assert_eq!(u64_to_f64(f64_to_u64(val)), val);
-    }
-
-    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
-        let mut buffer = Vec::new();
-        O::default().serialize(&mut buffer).unwrap();
-        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
-    }
-
-    proptest! {
-        #[test]
-        fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) {
-            let left_u64 = f64_to_u64(left);
-            let right_u64 = f64_to_u64(right);
-            assert_eq!(left_u64 < right_u64,  left < right);
-        }
-    }
-
-    #[test]
-    fn test_i64_converter() {
-        assert_eq!(i64_to_u64(i64::min_value()), u64::min_value());
-        assert_eq!(i64_to_u64(i64::max_value()), u64::max_value());
-        test_i64_converter_helper(0i64);
-        test_i64_converter_helper(i64::min_value());
-        test_i64_converter_helper(i64::max_value());
-        for i in -1000i64..1000i64 {
-            test_i64_converter_helper(i);
-        }
-    }
-
-    #[test]
-    fn test_f64_converter() {
-        test_f64_converter_helper(f64::INFINITY);
-        test_f64_converter_helper(f64::NEG_INFINITY);
-        test_f64_converter_helper(0.0);
-        test_f64_converter_helper(-0.0);
-        test_f64_converter_helper(1.0);
-        test_f64_converter_helper(-1.0);
-    }
-
-    #[test]
-    fn test_f64_order() {
-        assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
-            .contains(&f64_to_u64(f64::NAN))); //nan is not a number
-        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
-        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
-        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
-        assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
-        assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
-        assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
-        assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5));
-    }
-
-    #[test]
-    fn test_compute_num_bits() {
-        assert_eq!(compute_num_bits(1), 1u8);
-        assert_eq!(compute_num_bits(0), 0u8);
-        assert_eq!(compute_num_bits(2), 2u8);
-        assert_eq!(compute_num_bits(3), 2u8);
-        assert_eq!(compute_num_bits(4), 3u8);
-        assert_eq!(compute_num_bits(255), 8u8);
-        assert_eq!(compute_num_bits(256), 9u8);
-        assert_eq!(compute_num_bits(5_000_000_000), 33u8);
-    }
-
-    #[test]
-    fn test_max_doc() {
-        // this is the first time I write a unit test for a constant.
-        assert!(((super::MAX_DOC_LIMIT - 1) as i32) >= 0);
-        assert!((super::MAX_DOC_LIMIT as i32) < 0);
-    }
-
-    #[test]
-    fn test_minmax_empty() {
-        let vals: Vec<u32> = vec![];
-        assert_eq!(minmax(vals.into_iter()), None);
-    }
-
-    #[test]
-    fn test_minmax_one() {
-        assert_eq!(minmax(vec![1].into_iter()), Some((1, 1)));
-    }
-
-    #[test]
-    fn test_minmax_two() {
-        assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2)));
-        assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2)));
-    }
-}
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -1,5 +1,4 @@
 use crossbeam::channel;
-use rayon::{ThreadPool, ThreadPoolBuilder};

 /// Search executor whether search request are single thread or multithread.
 ///
@@ -11,8 +10,6 @@ use rayon::{ThreadPool, ThreadPoolBuilder};
 pub enum Executor {
    /// Single thread variant of an Executor
    SingleThread,
-    /// Thread pool variant of an Executor
-    ThreadPool(ThreadPool),
 }

 impl Executor {
@@ -21,15 +18,6 @@ impl Executor {
        Executor::SingleThread
    }

-    /// Creates an Executor that dispatches the tasks in a thread pool.
-    pub fn multi_thread(num_threads: usize, prefix: &'static str) -> crate::Result<Executor> {
-        let pool = ThreadPoolBuilder::new()
-            .num_threads(num_threads)
-            .thread_name(move |num| format!("{}{}", prefix, num))
-            .build()?;
-        Ok(Executor::ThreadPool(pool))
-    }
-
    /// Perform a map in the thread pool.
    ///
    /// Regardless of the executor (`SingleThread` or `ThreadPool`), panics in the task
@@ -46,40 +34,6 @@ impl Executor {
    ) -> crate::Result<Vec<R>> {
        match self {
            Executor::SingleThread => args.map(f).collect::<crate::Result<_>>(),
-            Executor::ThreadPool(pool) => {
-                let args_with_indices: Vec<(usize, A)> = args.enumerate().collect();
-                let num_fruits = args_with_indices.len();
-                let fruit_receiver = {
-                    let (fruit_sender, fruit_receiver) = channel::unbounded();
-                    pool.scope(|scope| {
-                        for arg_with_idx in args_with_indices {
-                            scope.spawn(|_| {
-                                let (idx, arg) = arg_with_idx;
-                                let fruit = f(arg);
-                                if let Err(err) = fruit_sender.send((idx, fruit)) {
-                                    error!("Failed to send search task. It probably means all search threads have panicked. {:?}", err);
-                                }
-                            });
-                        }
-                    });
-                    fruit_receiver
-                    // This ends the scope of fruit_sender.
-                    // This is important as it makes it possible for the fruit_receiver iteration to
-                    // terminate.
-                };
-                // This is lame, but safe.
-                let mut results_with_position = Vec::with_capacity(num_fruits);
-                for (pos, fruit_res) in fruit_receiver {
-                    let fruit = fruit_res?;
-                    results_with_position.push((pos, fruit));
-                }
-                results_with_position.sort_by_key(|(pos, _)| *pos);
-                assert_eq!(results_with_position.len(), num_fruits);
-                Ok(results_with_position
-                    .into_iter()
-                    .map(|(_, fruit)| fruit)
-                    .collect::<Vec<_>>())
-            }
        }
    }
 }
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -120,7 +120,7 @@ impl IndexBuilder {
    /// Creates a new index in a given filepath.
    /// The index will use the `MMapDirectory`.
    ///
-    /// If a previous index was in this directory, then its meta file will be destroyed.
+    /// If a previous index was in this directory, it returns an `IndexAlreadyExists` error.
    #[cfg(feature = "mmap")]
    pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index> {
        let mmap_directory = MmapDirectory::open(directory_path)?;
@@ -229,7 +229,8 @@ impl Index {
    /// Creates a new index using the `RamDirectory`.
    ///
    /// The index will be allocated in anonymous memory.
-    /// This should only be used for unit tests.
+    /// This is useful for indexing small set of documents
+    /// for instances like unit test or temporary in memory index.
    pub fn create_in_ram(schema: Schema) -> Index {
        IndexBuilder::new().schema(schema).create_in_ram().unwrap()
    }
@@ -237,7 +238,7 @@ impl Index {
    /// Creates a new index in a given filepath.
    /// The index will use the `MMapDirectory`.
    ///
-    /// If a previous index was in this directory, then its meta file will be destroyed.
+    /// If a previous index was in this directory, then it returns  an `IndexAlreadyExists` error.
    #[cfg(feature = "mmap")]
    pub fn create_in_dir<P: AsRef<Path>>(
        directory_path: P,
@@ -523,7 +524,22 @@ impl Index {

    /// Returns the set of corrupted files
    pub fn validate_checksum(&self) -> crate::Result<HashSet<PathBuf>> {
-        self.directory.list_damaged().map_err(Into::into)
+        let managed_files = self.directory.list_managed_files();
+        let active_segments_files: HashSet<PathBuf> = self
+            .searchable_segment_metas()?
+            .iter()
+            .flat_map(|segment_meta| segment_meta.list_files())
+            .collect();
+        let active_existing_files: HashSet<&PathBuf> =
+            active_segments_files.intersection(&managed_files).collect();
+
+        let mut damaged_files = HashSet::new();
+        for path in active_existing_files {
+            if !self.directory.validate_checksum(path)? {
+                damaged_files.insert((*path).clone());
+            }
+        }
+        Ok(damaged_files)
    }
 }

--- a/src/core/index_meta.rs
+++ b/src/core/index_meta.rs
@@ -101,6 +101,7 @@ impl SegmentMeta {

    /// Returns the list of files that
    /// are required for the segment meta.
+    /// Note: Some of the returned files may not exist depending on the state of the segment.
    ///
    /// This is useful as the way tantivy removes files
    /// is by removing all files that have been created by tantivy
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -1,6 +1,5 @@
 use std::io;

-use crate::common::BinarySerializable;
 use crate::directory::FileSlice;
 use crate::positions::PositionReader;
 use crate::postings::TermInfo;
@@ -8,6 +7,7 @@ use crate::postings::{BlockSegmentPostings, SegmentPostings};
 use crate::schema::IndexRecordOption;
 use crate::schema::Term;
 use crate::termdict::TermDictionary;
+use common::BinarySerializable;

 /// The inverted index reader is in charge of accessing
 /// the inverted index associated to a specific field.
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -2,8 +2,10 @@ use crate::core::InvertedIndexReader;
 use crate::core::Segment;
 use crate::core::SegmentComponent;
 use crate::core::SegmentId;
+use crate::directory::CompositeFile;
 use crate::directory::FileSlice;
-use crate::fastfield::DeleteBitSet;
+use crate::error::DataCorruption;
+use crate::fastfield::AliveBitSet;
 use crate::fastfield::FacetReader;
 use crate::fastfield::FastFieldReaders;
 use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
@@ -14,7 +16,6 @@ use crate::space_usage::SegmentSpaceUsage;
 use crate::store::StoreReader;
 use crate::termdict::TermDictionary;
 use crate::DocId;
-use crate::{common::CompositeFile, error::DataCorruption};
 use fail::fail_point;
 use std::fmt;
 use std::sync::Arc;
@@ -46,7 +47,7 @@ pub struct SegmentReader {
    fieldnorm_readers: FieldNormReaders,

    store_file: FileSlice,
-    delete_bitset_opt: Option<DeleteBitSet>,
+    alive_bitset_opt: Option<AliveBitSet>,
    schema: Schema,
 }

@@ -71,14 +72,12 @@ impl SegmentReader {
    /// Return the number of documents that have been
    /// deleted in the segment.
    pub fn num_deleted_docs(&self) -> DocId {
-        self.delete_bitset()
-            .map(|delete_set| delete_set.num_deleted() as DocId)
-            .unwrap_or(0u32)
+        self.max_doc - self.num_docs
    }

    /// Returns true iff some of the documents of the segment have been deleted.
    pub fn has_deletes(&self) -> bool {
-        self.delete_bitset().is_some()
+        self.num_deleted_docs() > 0
    }

    /// Accessor to a segment's fast field reader given a field.
@@ -169,10 +168,10 @@ impl SegmentReader {
        let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
        let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;

-        let delete_bitset_opt = if segment.meta().has_deletes() {
-            let delete_data = segment.open_read(SegmentComponent::Delete)?;
-            let delete_bitset = DeleteBitSet::open(delete_data)?;
-            Some(delete_bitset)
+        let alive_bitset_opt = if segment.meta().has_deletes() {
+            let alive_bitset_bytes = segment.open_read(SegmentComponent::Delete)?.read_bytes()?;
+            let alive_bitset = AliveBitSet::open(alive_bitset_bytes);
+            Some(alive_bitset)
        } else {
            None
        };
@@ -187,7 +186,7 @@ impl SegmentReader {
            fieldnorm_readers,
            segment_id: segment.id(),
            store_file,
-            delete_bitset_opt,
+            alive_bitset_opt,
            positions_composite,
            schema,
        })
@@ -273,21 +272,25 @@ impl SegmentReader {

    /// Returns the bitset representing
    /// the documents that have been deleted.
-    pub fn delete_bitset(&self) -> Option<&DeleteBitSet> {
-        self.delete_bitset_opt.as_ref()
+    pub fn alive_bitset(&self) -> Option<&AliveBitSet> {
+        self.alive_bitset_opt.as_ref()
    }

    /// Returns true iff the `doc` is marked
    /// as deleted.
    pub fn is_deleted(&self, doc: DocId) -> bool {
-        self.delete_bitset()
+        self.alive_bitset()
            .map(|delete_set| delete_set.is_deleted(doc))
            .unwrap_or(false)
    }

    /// Returns an iterator that will iterate over the alive document ids
-    pub fn doc_ids_alive(&self) -> impl Iterator<Item = DocId> + '_ {
-        (0u32..self.max_doc).filter(move |doc| !self.is_deleted(*doc))
+    pub fn doc_ids_alive(&self) -> Box<dyn Iterator<Item = DocId> + '_> {
+        if let Some(alive_bitset) = &self.alive_bitset_opt {
+            Box::new(alive_bitset.iter_alive())
+        } else {
+            Box::new(0u32..self.max_doc)
+        }
    }

    /// Summarize total space usage of this segment.
@@ -300,9 +303,9 @@ impl SegmentReader {
            self.fast_fields_readers.space_usage(),
            self.fieldnorm_readers.space_usage(),
            self.get_store_reader()?.space_usage(),
-            self.delete_bitset_opt
+            self.alive_bitset_opt
                .as_ref()
-                .map(DeleteBitSet::space_usage)
+                .map(AliveBitSet::space_usage)
                .unwrap_or(0),
        ))
    }
--- a/src/directory/composite_file.rs
+++ b/src/directory/composite_file.rs
@@ -1,18 +1,17 @@
-use crate::common::BinarySerializable;
-use crate::common::CountingWriter;
-use crate::common::VInt;
 use crate::directory::FileSlice;
 use crate::directory::{TerminatingWrite, WritePtr};
 use crate::schema::Field;
 use crate::space_usage::FieldUsage;
 use crate::space_usage::PerFieldSpaceUsage;
+use common::BinarySerializable;
+use common::CountingWriter;
+use common::HasLen;
+use common::VInt;
 use std::collections::HashMap;
 use std::io::{self, Read, Write};
 use std::iter::ExactSizeIterator;
 use std::ops::Range;

-use super::HasLen;
-
 #[derive(Eq, PartialEq, Hash, Copy, Ord, PartialOrd, Clone, Debug)]
 pub struct FileAddr {
    field: Field,
@@ -188,10 +187,10 @@ impl CompositeFile {
 mod test {

    use super::{CompositeFile, CompositeWrite};
-    use crate::common::BinarySerializable;
-    use crate::common::VInt;
    use crate::directory::{Directory, RamDirectory};
    use crate::schema::Field;
+    use common::BinarySerializable;
+    use common::VInt;
    use std::io::Write;
    use std::path::Path;

--- a/src/directory/file_slice.rs
+++ b/src/directory/file_slice.rs
@@ -1,7 +1,7 @@
 use stable_deref_trait::StableDeref;

-use crate::common::HasLen;
 use crate::directory::OwnedBytes;
+use common::HasLen;
 use std::fmt;
 use std::ops::Range;
 use std::sync::{Arc, Weak};
@@ -32,12 +32,6 @@ impl FileHandle for &'static [u8] {
    }
 }

-impl<T: Deref<Target = [u8]>> HasLen for T {
-    fn len(&self) -> usize {
-        self.deref().len()
-    }
-}
-
 impl<B> From<B> for FileSlice
 where
    B: StableDeref + Deref<Target = [u8]> + 'static + Send + Sync,
@@ -178,7 +172,7 @@ impl HasLen for FileSlice {
 #[cfg(test)]
 mod tests {
    use super::{FileHandle, FileSlice};
-    use crate::common::HasLen;
+    use common::HasLen;
    use std::io;

    #[test]
--- a/src/directory/footer.rs
+++ b/src/directory/footer.rs
@@ -1,10 +1,10 @@
 use crate::directory::error::Incompatibility;
 use crate::directory::FileSlice;
 use crate::{
-    common::{BinarySerializable, CountingWriter, DeserializeFrom, FixedSize, HasLen},
    directory::{AntiCallToken, TerminatingWrite},
    Version, INDEX_FORMAT_VERSION,
 };
+use common::{BinarySerializable, CountingWriter, DeserializeFrom, FixedSize, HasLen};
 use crc32fast::Hasher;
 use serde::{Deserialize, Serialize};
 use std::io;
@@ -156,10 +156,8 @@ mod tests {

    use crate::directory::footer::Footer;
    use crate::directory::OwnedBytes;
-    use crate::{
-        common::BinarySerializable,
-        directory::{footer::FOOTER_MAGIC_NUMBER, FileSlice},
-    };
+    use crate::directory::{footer::FOOTER_MAGIC_NUMBER, FileSlice};
+    use common::BinarySerializable;
    use std::io;

    #[test]
--- a/src/directory/managed_directory.rs
+++ b/src/directory/managed_directory.rs
@@ -1,4 +1,4 @@
-use crate::core::{MANAGED_FILEPATH, META_FILEPATH};
+use crate::core::MANAGED_FILEPATH;
 use crate::directory::error::{DeleteError, LockError, OpenReadError, OpenWriteError};
 use crate::directory::footer::{Footer, FooterProxy};
 use crate::directory::GarbageCollectionResult;
@@ -248,24 +248,15 @@ impl ManagedDirectory {
        Ok(footer.crc() == crc)
    }

-    /// List files for which checksum does not match content
-    pub fn list_damaged(&self) -> result::Result<HashSet<PathBuf>, OpenReadError> {
-        let mut managed_paths = self
+    /// List all managed files
+    pub fn list_managed_files(&self) -> HashSet<PathBuf> {
+        let managed_paths = self
            .meta_informations
            .read()
            .expect("Managed directory rlock poisoned in list damaged.")
            .managed_paths
            .clone();
-
-        managed_paths.remove(*META_FILEPATH);
-
-        let mut damaged_files = HashSet::new();
-        for path in managed_paths {
-            if !self.validate_checksum(&path)? {
-                damaged_files.insert(path);
-            }
-        }
-        Ok(damaged_files)
+        managed_paths
    }
 }

@@ -336,7 +327,6 @@ mod tests_mmap_specific {

    use crate::directory::{Directory, ManagedDirectory, MmapDirectory, TerminatingWrite};
    use std::collections::HashSet;
-    use std::fs::OpenOptions;
    use std::io::Write;
    use std::path::{Path, PathBuf};
    use tempfile::TempDir;
@@ -405,39 +395,4 @@ mod tests_mmap_specific {
        }
        assert!(!managed_directory.exists(test_path1).unwrap());
    }
-
-    #[test]
-    fn test_checksum() -> crate::Result<()> {
-        let test_path1: &'static Path = Path::new("some_path_for_test");
-        let test_path2: &'static Path = Path::new("other_test_path");
-
-        let tempdir = TempDir::new().unwrap();
-        let tempdir_path = PathBuf::from(tempdir.path());
-
-        let mmap_directory = MmapDirectory::open(&tempdir_path)?;
-        let managed_directory = ManagedDirectory::wrap(mmap_directory)?;
-        let mut write = managed_directory.open_write(test_path1)?;
-        write.write_all(&[0u8, 1u8])?;
-        write.terminate()?;
-
-        let mut write = managed_directory.open_write(test_path2)?;
-        write.write_all(&[3u8, 4u8, 5u8])?;
-        write.terminate()?;
-
-        let read_file = managed_directory.open_read(test_path2)?.read_bytes()?;
-        assert_eq!(read_file.as_slice(), &[3u8, 4u8, 5u8]);
-        assert!(managed_directory.list_damaged().unwrap().is_empty());
-
-        let mut corrupted_path = tempdir_path;
-        corrupted_path.push(test_path2);
-        let mut file = OpenOptions::new().write(true).open(&corrupted_path)?;
-        file.write_all(&[255u8])?;
-        file.flush()?;
-        drop(file);
-
-        let damaged = managed_directory.list_damaged()?;
-        assert_eq!(damaged.len(), 1);
-        assert!(damaged.contains(test_path2));
-        Ok(())
-    }
 }
--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -11,7 +11,7 @@ use crate::directory::{AntiCallToken, FileHandle, OwnedBytes};
 use crate::directory::{ArcBytes, WeakArcBytes};
 use crate::directory::{TerminatingWrite, WritePtr};
 use fs2::FileExt;
-use memmap::Mmap;
+use memmap2::Mmap;
 use serde::{Deserialize, Serialize};
 use stable_deref_trait::StableDeref;
 use std::convert::From;
@@ -53,7 +53,7 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
        return Ok(None);
    }
    unsafe {
-        memmap::Mmap::map(&file)
+        memmap2::Mmap::map(&file)
            .map(Some)
            .map_err(|io_err| OpenReadError::wrap_io_error(io_err, full_path.to_path_buf()))
    }
@@ -485,13 +485,14 @@ mod tests {
    // The following tests are specific to the MmapDirectory

    use super::*;
+    use crate::indexer::LogMergePolicy;
    use crate::Index;
    use crate::ReloadPolicy;
-    use crate::{common::HasLen, indexer::LogMergePolicy};
    use crate::{
        schema::{Schema, SchemaBuilder, TEXT},
        IndexSettings,
    };
+    use common::HasLen;

    #[test]
    fn test_open_non_existent_path() {
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -20,6 +20,9 @@ mod watch_event_router;
 /// Errors specific to the directory module.
 pub mod error;

+mod composite_file;
+
+pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
 pub use self::directory::DirectoryLock;
 pub use self::directory::{Directory, DirectoryClone};
 pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
--- a/src/directory/ram_directory.rs
+++ b/src/directory/ram_directory.rs
@@ -1,9 +1,10 @@
+use crate::core::META_FILEPATH;
 use crate::directory::error::{DeleteError, OpenReadError, OpenWriteError};
 use crate::directory::AntiCallToken;
 use crate::directory::WatchCallbackList;
 use crate::directory::{Directory, FileSlice, WatchCallback, WatchHandle};
 use crate::directory::{TerminatingWrite, WritePtr};
-use crate::{common::HasLen, core::META_FILEPATH};
+use common::HasLen;
 use fail::fail_point;
 use std::collections::HashMap;
 use std::fmt;
--- a/src/docset.rs
+++ b/src/docset.rs
@@ -1,4 +1,4 @@
-use crate::fastfield::DeleteBitSet;
+use crate::fastfield::AliveBitSet;
 use crate::DocId;
 use std::borrow::Borrow;
 use std::borrow::BorrowMut;
@@ -85,11 +85,11 @@ pub trait DocSet: Send {

    /// Returns the number documents matching.
    /// Calling this method consumes the `DocSet`.
-    fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
+    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
        let mut count = 0u32;
        let mut doc = self.doc();
        while doc != TERMINATED {
-            if !delete_bitset.is_deleted(doc) {
+            if alive_bitset.is_alive(doc) {
                count += 1u32;
            }
            doc = self.advance();
@@ -130,8 +130,8 @@ impl<'a> DocSet for &'a mut dyn DocSet {
        (**self).size_hint()
    }

-    fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
-        (**self).count(delete_bitset)
+    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
+        (**self).count(alive_bitset)
    }

    fn count_including_deleted(&mut self) -> u32 {
@@ -160,9 +160,9 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
        unboxed.size_hint()
    }

-    fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
+    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.count(delete_bitset)
+        unboxed.count(alive_bitset)
    }

    fn count_including_deleted(&mut self) -> u32 {
--- a/src/fastfield/alive_bitset.rs
+++ b/src/fastfield/alive_bitset.rs
@@ -0,0 +1,202 @@
+use crate::space_usage::ByteCount;
+use crate::DocId;
+use common::BitSet;
+use common::ReadSerializedBitSet;
+use ownedbytes::OwnedBytes;
+use std::io;
+use std::io::Write;
+
+/// Write a alive `BitSet`
+///
+/// where `alive_bitset` is the set of alive `DocId`.
+/// Warning: this function does not call terminate. The caller is in charge of
+/// closing the writer properly.
+pub fn write_alive_bitset<T: Write>(alive_bitset: &BitSet, writer: &mut T) -> io::Result<()> {
+    alive_bitset.serialize(writer)?;
+    Ok(())
+}
+
+/// Set of alive `DocId`s.
+#[derive(Clone)]
+pub struct AliveBitSet {
+    num_alive_docs: usize,
+    bitset: ReadSerializedBitSet,
+    num_bytes: ByteCount,
+}
+
+impl AliveBitSet {
+    #[cfg(test)]
+    pub(crate) fn for_test_from_deleted_docs(deleted_docs: &[DocId], max_doc: u32) -> AliveBitSet {
+        assert!(deleted_docs.iter().all(|&doc| doc < max_doc));
+        let mut bitset = BitSet::with_max_value_and_full(max_doc);
+        for &doc in deleted_docs {
+            bitset.remove(doc);
+        }
+        let mut alive_bitset_buffer = Vec::new();
+        write_alive_bitset(&bitset, &mut alive_bitset_buffer).unwrap();
+        let alive_bitset_bytes = OwnedBytes::new(alive_bitset_buffer);
+        Self::open(alive_bitset_bytes)
+    }
+
+    /// Opens a delete bitset given its file.
+    pub fn open(bytes: OwnedBytes) -> AliveBitSet {
+        let num_bytes = bytes.len();
+        let bitset = ReadSerializedBitSet::open(bytes);
+        AliveBitSet {
+            num_alive_docs: bitset.len(),
+            bitset,
+            num_bytes,
+        }
+    }
+
+    /// Returns true iff the document is still "alive". In other words, if it has not been deleted.
+    #[inline]
+    pub fn is_alive(&self, doc: DocId) -> bool {
+        self.bitset.contains(doc)
+    }
+
+    /// Returns true iff the document has been marked as deleted.
+    #[inline]
+    pub fn is_deleted(&self, doc: DocId) -> bool {
+        !self.is_alive(doc)
+    }
+
+    /// Iterate over the alive docids.
+    #[inline]
+    pub fn iter_alive(&self) -> impl Iterator<Item = DocId> + '_ {
+        self.bitset.iter()
+    }
+
+    /// Get underlying bitset
+    #[inline]
+    pub fn bitset(&self) -> &ReadSerializedBitSet {
+        &self.bitset
+    }
+
+    /// The number of deleted docs
+    pub fn num_alive_docs(&self) -> usize {
+        self.num_alive_docs
+    }
+
+    /// Summarize total space usage of this bitset.
+    pub fn space_usage(&self) -> ByteCount {
+        self.num_bytes
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::AliveBitSet;
+
+    #[test]
+    fn test_alive_bitset_empty() {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[], 10);
+        for doc in 0..10 {
+            assert_eq!(alive_bitset.is_deleted(doc), !alive_bitset.is_alive(doc));
+            assert!(!alive_bitset.is_deleted(doc));
+        }
+        assert_eq!(alive_bitset.num_alive_docs(), 10);
+    }
+
+    #[test]
+    fn test_alive_bitset() {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[1, 9], 10);
+        assert!(alive_bitset.is_alive(0));
+        assert!(alive_bitset.is_deleted(1));
+        assert!(alive_bitset.is_alive(2));
+        assert!(alive_bitset.is_alive(3));
+        assert!(alive_bitset.is_alive(4));
+        assert!(alive_bitset.is_alive(5));
+        assert!(alive_bitset.is_alive(6));
+        assert!(alive_bitset.is_alive(6));
+        assert!(alive_bitset.is_alive(7));
+        assert!(alive_bitset.is_alive(8));
+        assert!(alive_bitset.is_deleted(9));
+        for doc in 0..10 {
+            assert_eq!(alive_bitset.is_deleted(doc), !alive_bitset.is_alive(doc));
+        }
+        assert_eq!(alive_bitset.num_alive_docs(), 8);
+    }
+
+    #[test]
+    fn test_alive_bitset_iter_minimal() {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[7], 8);
+
+        let data: Vec<_> = alive_bitset.iter_alive().collect();
+        assert_eq!(data, vec![0, 1, 2, 3, 4, 5, 6]);
+    }
+
+    #[test]
+    fn test_alive_bitset_iter_small() {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[0, 2, 3, 6], 7);
+
+        let data: Vec<_> = alive_bitset.iter_alive().collect();
+        assert_eq!(data, vec![1, 4, 5]);
+    }
+    #[test]
+    fn test_alive_bitset_iter() {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[0, 1, 1000], 1001);
+
+        let data: Vec<_> = alive_bitset.iter_alive().collect();
+        assert_eq!(data, (2..=999).collect::<Vec<_>>());
+    }
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use super::AliveBitSet;
+    use rand::prelude::IteratorRandom;
+    use rand::thread_rng;
+    use test::Bencher;
+
+    fn get_alive() -> Vec<u32> {
+        let mut data = (0..1_000_000_u32).collect::<Vec<u32>>();
+        for _ in 0..(1_000_000) * 1 / 8 {
+            remove_rand(&mut data);
+        }
+        data
+    }
+
+    fn remove_rand(raw: &mut Vec<u32>) {
+        let i = (0..raw.len()).choose(&mut thread_rng()).unwrap();
+        raw.remove(i);
+    }
+
+    #[bench]
+    fn bench_deletebitset_iter_deser_on_fly(bench: &mut Bencher) {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[0, 1, 1000, 10000], 1_000_000);
+
+        bench.iter(|| alive_bitset.iter_alive().collect::<Vec<_>>());
+    }
+
+    #[bench]
+    fn bench_deletebitset_access(bench: &mut Bencher) {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[0, 1, 1000, 10000], 1_000_000);
+
+        bench.iter(|| {
+            (0..1_000_000_u32)
+                .filter(|doc| alive_bitset.is_alive(*doc))
+                .collect::<Vec<_>>()
+        });
+    }
+
+    #[bench]
+    fn bench_deletebitset_iter_deser_on_fly_1_8_alive(bench: &mut Bencher) {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&get_alive(), 1_000_000);
+
+        bench.iter(|| alive_bitset.iter_alive().collect::<Vec<_>>());
+    }
+
+    #[bench]
+    fn bench_deletebitset_access_1_8_alive(bench: &mut Bencher) {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&get_alive(), 1_000_000);
+
+        bench.iter(|| {
+            (0..1_000_000_u32)
+                .filter(|doc| alive_bitset.is_alive(*doc))
+                .collect::<Vec<_>>()
+        });
+    }
+}
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -1,143 +0,0 @@
-use crate::common::{BitSet, HasLen};
-use crate::directory::FileSlice;
-use crate::directory::OwnedBytes;
-use crate::directory::WritePtr;
-use crate::space_usage::ByteCount;
-use crate::DocId;
-use std::io;
-use std::io::Write;
-
-/// Write a delete `BitSet`
-///
-/// where `delete_bitset` is the set of deleted `DocId`.
-/// Warning: this function does not call terminate. The caller is in charge of
-/// closing the writer properly.
-pub fn write_delete_bitset(
-    delete_bitset: &BitSet,
-    max_doc: u32,
-    writer: &mut WritePtr,
-) -> io::Result<()> {
-    let mut byte = 0u8;
-    let mut shift = 0u8;
-    for doc in 0..max_doc {
-        if delete_bitset.contains(doc) {
-            byte |= 1 << shift;
-        }
-        if shift == 7 {
-            writer.write_all(&[byte])?;
-            shift = 0;
-            byte = 0;
-        } else {
-            shift += 1;
-        }
-    }
-    if max_doc % 8 > 0 {
-        writer.write_all(&[byte])?;
-    }
-    Ok(())
-}
-
-/// Set of deleted `DocId`s.
-#[derive(Clone)]
-pub struct DeleteBitSet {
-    data: OwnedBytes,
-    num_deleted: usize,
-}
-
-impl DeleteBitSet {
-    #[cfg(test)]
-    pub(crate) fn for_test(docs: &[DocId], max_doc: u32) -> DeleteBitSet {
-        use crate::directory::{Directory, RamDirectory, TerminatingWrite};
-        use std::path::Path;
-        assert!(docs.iter().all(|&doc| doc < max_doc));
-        let mut bitset = BitSet::with_max_value(max_doc);
-        for &doc in docs {
-            bitset.insert(doc);
-        }
-        let directory = RamDirectory::create();
-        let path = Path::new("dummydeletebitset");
-        let mut wrt = directory.open_write(path).unwrap();
-        write_delete_bitset(&bitset, max_doc, &mut wrt).unwrap();
-        wrt.terminate().unwrap();
-        let file = directory.open_read(path).unwrap();
-        Self::open(file).unwrap()
-    }
-
-    /// Opens a delete bitset given its file.
-    pub fn open(file: FileSlice) -> crate::Result<DeleteBitSet> {
-        let bytes = file.read_bytes()?;
-        let num_deleted: usize = bytes
-            .as_slice()
-            .iter()
-            .map(|b| b.count_ones() as usize)
-            .sum();
-        Ok(DeleteBitSet {
-            data: bytes,
-            num_deleted,
-        })
-    }
-
-    /// Returns true iff the document is still "alive". In other words, if it has not been deleted.
-    pub fn is_alive(&self, doc: DocId) -> bool {
-        !self.is_deleted(doc)
-    }
-
-    /// Returns true iff the document has been marked as deleted.
-    #[inline]
-    pub fn is_deleted(&self, doc: DocId) -> bool {
-        let byte_offset = doc / 8u32;
-        let b: u8 = self.data.as_slice()[byte_offset as usize];
-        let shift = (doc & 7u32) as u8;
-        b & (1u8 << shift) != 0
-    }
-
-    /// The number of deleted docs
-    pub fn num_deleted(&self) -> usize {
-        self.num_deleted
-    }
-    /// Summarize total space usage of this bitset.
-    pub fn space_usage(&self) -> ByteCount {
-        self.data.len()
-    }
-}
-
-impl HasLen for DeleteBitSet {
-    fn len(&self) -> usize {
-        self.num_deleted
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::DeleteBitSet;
-    use crate::common::HasLen;
-
-    #[test]
-    fn test_delete_bitset_empty() {
-        let delete_bitset = DeleteBitSet::for_test(&[], 10);
-        for doc in 0..10 {
-            assert_eq!(delete_bitset.is_deleted(doc), !delete_bitset.is_alive(doc));
-        }
-        assert_eq!(delete_bitset.len(), 0);
-    }
-
-    #[test]
-    fn test_delete_bitset() {
-        let delete_bitset = DeleteBitSet::for_test(&[1, 9], 10);
-        assert!(delete_bitset.is_alive(0));
-        assert!(delete_bitset.is_deleted(1));
-        assert!(delete_bitset.is_alive(2));
-        assert!(delete_bitset.is_alive(3));
-        assert!(delete_bitset.is_alive(4));
-        assert!(delete_bitset.is_alive(5));
-        assert!(delete_bitset.is_alive(6));
-        assert!(delete_bitset.is_alive(6));
-        assert!(delete_bitset.is_alive(7));
-        assert!(delete_bitset.is_alive(8));
-        assert!(delete_bitset.is_deleted(9));
-        for doc in 0..10 {
-            assert_eq!(delete_bitset.is_deleted(doc), !delete_bitset.is_alive(doc));
-        }
-        assert_eq!(delete_bitset.len(), 2);
-    }
-}
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -23,9 +23,9 @@ values stored.
 Read access performance is comparable to that of an array lookup.
 */

+pub use self::alive_bitset::write_alive_bitset;
+pub use self::alive_bitset::AliveBitSet;
 pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
-pub use self::delete::write_delete_bitset;
-pub use self::delete::DeleteBitSet;
 pub use self::error::{FastFieldNotAvailableError, Result};
 pub use self::facet_reader::FacetReader;
 pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter};
@@ -40,14 +40,14 @@ pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
 use crate::schema::Cardinality;
 use crate::schema::FieldType;
 use crate::schema::Value;
+use crate::DocId;
 use crate::{
    chrono::{NaiveDateTime, Utc},
    schema::Type,
 };
-use crate::{common, DocId};

+mod alive_bitset;
 mod bytes;
-mod delete;
 mod error;
 mod facet_reader;
 mod multivalued;
@@ -213,8 +213,7 @@ fn value_to_u64(value: &Value) -> u64 {
 mod tests {

    use super::*;
-    use crate::common::CompositeFile;
-    use crate::common::HasLen;
+    use crate::directory::CompositeFile;
    use crate::directory::{Directory, RamDirectory, WritePtr};
    use crate::merge_policy::NoMergePolicy;
    use crate::schema::Field;
@@ -222,6 +221,7 @@ mod tests {
    use crate::schema::FAST;
    use crate::schema::{Document, IntOptions};
    use crate::{Index, SegmentId, SegmentReader};
+    use common::HasLen;
    use once_cell::sync::Lazy;
    use rand::prelude::SliceRandom;
    use rand::rngs::StdRng;
@@ -588,7 +588,7 @@ mod bench {
    use super::tests::FIELD;
    use super::tests::{generate_permutation, SCHEMA};
    use super::*;
-    use crate::common::CompositeFile;
+    use crate::directory::CompositeFile;
    use crate::directory::{Directory, RamDirectory, WritePtr};
    use crate::fastfield::FastFieldReader;
    use std::collections::HashMap;
--- a/src/fastfield/multivalued/mod.rs
+++ b/src/fastfield/multivalued/mod.rs
@@ -8,14 +8,22 @@ pub use self::writer::MultiValuedFastFieldWriter;
 mod tests {

    use crate::collector::TopDocs;
+    use crate::indexer::NoMergePolicy;
    use crate::query::QueryParser;
    use crate::schema::Cardinality;
    use crate::schema::Facet;
    use crate::schema::IntOptions;
    use crate::schema::Schema;
    use crate::schema::INDEXED;
+    use crate::Document;
    use crate::Index;
+    use crate::Term;
    use chrono::Duration;
+    use futures::executor::block_on;
+    use proptest::prop_oneof;
+    use proptest::proptest;
+    use proptest::strategy::Strategy;
+    use test_env_log::test;

    #[test]
    fn test_multivalued_u64() {
@@ -225,6 +233,111 @@ mod tests {
        multi_value_reader.get_vals(3, &mut vals);
        assert_eq!(&vals, &[-5i64, -20i64, 1i64]);
    }
+
+    fn test_multivalued_no_panic(ops: &[IndexingOp]) {
+        let mut schema_builder = Schema::builder();
+        let field = schema_builder.add_u64_field(
+            "multifield",
+            IntOptions::default()
+                .set_fast(Cardinality::MultiValues)
+                .set_indexed(),
+        );
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index.writer_for_tests().unwrap();
+        index_writer.set_merge_policy(Box::new(NoMergePolicy));
+
+        for &op in ops {
+            match op {
+                IndexingOp::AddDoc { id } => {
+                    match id % 3 {
+                        0 => {
+                            index_writer.add_document(doc!());
+                        }
+                        1 => {
+                            let mut doc = Document::new();
+                            for _ in 0..5001 {
+                                doc.add_u64(field, id as u64);
+                            }
+                            index_writer.add_document(doc);
+                        }
+                        _ => {
+                            let mut doc = Document::new();
+                            doc.add_u64(field, id as u64);
+                            index_writer.add_document(doc);
+                        }
+                    };
+                }
+                IndexingOp::DeleteDoc { id } => {
+                    index_writer.delete_term(Term::from_field_u64(field, id as u64));
+                }
+                IndexingOp::Commit => {
+                    index_writer.commit().unwrap();
+                }
+                IndexingOp::Merge => {
+                    let segment_ids = index
+                        .searchable_segment_ids()
+                        .expect("Searchable segments failed.");
+                    if segment_ids.len() >= 2 {
+                        block_on(index_writer.merge(&segment_ids)).unwrap();
+                        assert!(index_writer.segment_updater().wait_merging_thread().is_ok());
+                    }
+                }
+            }
+        }
+
+        assert!(index_writer.commit().is_ok());
+
+        // Merging the segments
+        {
+            let segment_ids = index
+                .searchable_segment_ids()
+                .expect("Searchable segments failed.");
+            if !segment_ids.is_empty() {
+                block_on(index_writer.merge(&segment_ids)).unwrap();
+                assert!(index_writer.wait_merging_threads().is_ok());
+            }
+        }
+    }
+
+    #[derive(Debug, Clone, Copy)]
+    enum IndexingOp {
+        AddDoc { id: u32 },
+        DeleteDoc { id: u32 },
+        Commit,
+        Merge,
+    }
+
+    fn operation_strategy() -> impl Strategy<Value = IndexingOp> {
+        prop_oneof![
+            (0u32..10u32).prop_map(|id| IndexingOp::DeleteDoc { id }),
+            (0u32..10u32).prop_map(|id| IndexingOp::AddDoc { id }),
+            (0u32..2u32).prop_map(|_| IndexingOp::Commit),
+            (0u32..1u32).prop_map(|_| IndexingOp::Merge),
+        ]
+    }
+
+    proptest! {
+        #[test]
+        fn test_multivalued_proptest(ops in proptest::collection::vec(operation_strategy(), 1..10)) {
+            test_multivalued_no_panic(&ops[..]);
+        }
+    }
+
+    #[test]
+    fn test_multivalued_proptest_off_by_one_bug_1151() {
+        use IndexingOp::*;
+        let ops = [
+            AddDoc { id: 3 },
+            AddDoc { id: 1 },
+            AddDoc { id: 3 },
+            Commit,
+            Merge,
+        ];
+
+        test_multivalued_no_panic(&ops[..]);
+    }
+
    #[test]
    #[ignore]
    fn test_many_facets() {
--- a/src/fastfield/reader.rs
+++ b/src/fastfield/reader.rs
@@ -1,6 +1,5 @@
 use super::FastValue;
-use crate::common::BinarySerializable;
-use crate::common::CompositeFile;
+use crate::directory::CompositeFile;
 use crate::directory::FileSlice;
 use crate::directory::OwnedBytes;
 use crate::directory::{Directory, RamDirectory, WritePtr};
@@ -8,6 +7,7 @@ use crate::fastfield::{CompositeFastFieldSerializer, FastFieldsWriter};
 use crate::schema::Schema;
 use crate::schema::FAST;
 use crate::DocId;
+use common::BinarySerializable;
 use fastfield_codecs::bitpacked::BitpackedFastFieldReader as BitpackedReader;
 use fastfield_codecs::bitpacked::BitpackedFastFieldSerializer;
 use fastfield_codecs::linearinterpol::LinearInterpolFastFieldReader;
--- a/src/fastfield/readers.rs
+++ b/src/fastfield/readers.rs
@@ -1,4 +1,4 @@
-use crate::common::CompositeFile;
+use crate::directory::CompositeFile;
 use crate::directory::FileSlice;
 use crate::fastfield::MultiValuedFastFieldReader;
 use crate::fastfield::{BitpackedFastFieldReader, FastFieldNotAvailableError};
--- a/src/fastfield/serializer/mod.rs
+++ b/src/fastfield/serializer/mod.rs
@@ -1,8 +1,8 @@
-use crate::common::BinarySerializable;
-use crate::common::CompositeWrite;
-use crate::common::CountingWriter;
+use crate::directory::CompositeWrite;
 use crate::directory::WritePtr;
 use crate::schema::Field;
+use common::BinarySerializable;
+use common::CountingWriter;
 pub use fastfield_codecs::bitpacked::BitpackedFastFieldSerializer;
 pub use fastfield_codecs::bitpacked::BitpackedFastFieldSerializerLegacy;
 use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
@@ -105,9 +105,7 @@ impl CompositeFastFieldSerializer {
            &fastfield_accessor,
            &mut estimations,
        );
-        if let Some(broken_estimation) = estimations
-            .iter()
-            .find(|estimation| estimation.0 == f32::NAN)
+        if let Some(broken_estimation) = estimations.iter().find(|estimation| estimation.0.is_nan())
        {
            warn!(
                "broken estimation for fast field codec {}",
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -1,12 +1,12 @@
 use super::multivalued::MultiValuedFastFieldWriter;
 use super::serializer::FastFieldStats;
 use super::FastFieldDataAccess;
-use crate::common;
 use crate::fastfield::{BytesFastFieldWriter, CompositeFastFieldSerializer};
 use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::postings::UnorderedTermId;
 use crate::schema::{Cardinality, Document, Field, FieldEntry, FieldType, Schema};
 use crate::termdict::TermOrdinal;
+use common;
 use fnv::FnvHashMap;
 use std::collections::HashMap;
 use std::io;
--- a/src/fieldnorm/reader.rs
+++ b/src/fieldnorm/reader.rs
@@ -1,5 +1,5 @@
 use super::{fieldnorm_to_id, id_to_fieldnorm};
-use crate::common::CompositeFile;
+use crate::directory::CompositeFile;
 use crate::directory::FileSlice;
 use crate::directory::OwnedBytes;
 use crate::schema::Field;
--- a/src/fieldnorm/serializer.rs
+++ b/src/fieldnorm/serializer.rs
@@ -1,4 +1,4 @@
-use crate::common::CompositeWrite;
+use crate::directory::CompositeWrite;
 use crate::directory::WritePtr;
 use crate::schema::Field;
 use std::io;
--- a/src/functional_test.rs
+++ b/src/functional_test.rs
@@ -1,4 +1,8 @@
+use crate::schema;
 use crate::Index;
+use crate::IndexSettings;
+use crate::IndexSortByField;
+use crate::Order;
 use crate::Searcher;
 use crate::{doc, schema::*};
 use rand::thread_rng;
@@ -35,7 +39,7 @@ fn test_functional_store() -> crate::Result<()> {
    let mut doc_set: Vec<u64> = Vec::new();

    let mut doc_id = 0u64;
-    for iteration in 0..500 {
+    for iteration in 0..get_num_iterations() {
        dbg!(iteration);
        let num_docs: usize = rng.gen_range(0..4);
        if !doc_set.is_empty() {
@@ -56,16 +60,37 @@ fn test_functional_store() -> crate::Result<()> {
    Ok(())
 }

+fn get_num_iterations() -> usize {
+    std::env::var("NUM_FUNCTIONAL_TEST_ITERATIONS")
+        .map(|str| str.parse().unwrap())
+        .unwrap_or(2000)
+}
 #[test]
 #[ignore]
-fn test_functional_indexing() -> crate::Result<()> {
+fn test_functional_indexing_sorted() -> crate::Result<()> {
    let mut schema_builder = Schema::builder();

-    let id_field = schema_builder.add_u64_field("id", INDEXED);
+    let id_field = schema_builder.add_u64_field("id", INDEXED | FAST);
    let multiples_field = schema_builder.add_u64_field("multiples", INDEXED);
+    let text_field_options = TextOptions::default()
+        .set_indexing_options(
+            TextFieldIndexing::default()
+                .set_index_option(schema::IndexRecordOption::WithFreqsAndPositions),
+        )
+        .set_stored();
+    let text_field = schema_builder.add_text_field("text_field", text_field_options);
    let schema = schema_builder.build();

-    let index = Index::create_from_tempdir(schema)?;
+    let mut index_builder = Index::builder().schema(schema);
+    index_builder = index_builder.settings(IndexSettings {
+        sort_by_field: Some(IndexSortByField {
+            field: "id".to_string(),
+            order: Order::Desc,
+        }),
+        ..Default::default()
+    });
+    let index = index_builder.create_from_tempdir().unwrap();
+
    let reader = index.reader()?;

    let mut rng = thread_rng();
@@ -75,7 +100,7 @@ fn test_functional_indexing() -> crate::Result<()> {
    let mut committed_docs: HashSet<u64> = HashSet::new();
    let mut uncommitted_docs: HashSet<u64> = HashSet::new();

-    for _ in 0..200 {
+    for _ in 0..get_num_iterations() {
        let random_val = rng.gen_range(0..20);
        if random_val == 0 {
            index_writer.commit()?;
@@ -98,6 +123,84 @@ fn test_functional_indexing() -> crate::Result<()> {
            for i in 1u64..10u64 {
                doc.add_u64(multiples_field, random_val * i);
            }
+            doc.add_text(text_field, get_text());
+            index_writer.add_document(doc);
+        }
+    }
+    Ok(())
+}
+
+const LOREM: &str = "Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed \
+             do eiusmod tempor incididunt ut labore et dolore magna aliqua. \
+             Ut enim ad minim veniam, quis nostrud exercitation ullamco \
+             laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure \
+             dolor in reprehenderit in voluptate velit esse cillum dolore eu \
+             fugiat nulla pariatur. Excepteur sint occaecat cupidatat non \
+             proident, sunt in culpa qui officia deserunt mollit anim id est \
+             laborum.";
+fn get_text() -> String {
+    use rand::seq::SliceRandom;
+    let mut rng = thread_rng();
+    let tokens: Vec<_> = LOREM.split(' ').collect();
+    let random_val = rng.gen_range(0..20);
+
+    (0..random_val)
+        .map(|_| tokens.choose(&mut rng).unwrap())
+        .cloned()
+        .collect::<Vec<_>>()
+        .join(" ")
+}
+
+#[test]
+#[ignore]
+fn test_functional_indexing_unsorted() -> crate::Result<()> {
+    let mut schema_builder = Schema::builder();
+
+    let id_field = schema_builder.add_u64_field("id", INDEXED);
+    let multiples_field = schema_builder.add_u64_field("multiples", INDEXED);
+    let text_field_options = TextOptions::default()
+        .set_indexing_options(
+            TextFieldIndexing::default()
+                .set_index_option(schema::IndexRecordOption::WithFreqsAndPositions),
+        )
+        .set_stored();
+    let text_field = schema_builder.add_text_field("text_field", text_field_options);
+    let schema = schema_builder.build();
+
+    let index = Index::create_from_tempdir(schema)?;
+    let reader = index.reader()?;
+
+    let mut rng = thread_rng();
+
+    let mut index_writer = index.writer_with_num_threads(3, 120_000_000)?;
+
+    let mut committed_docs: HashSet<u64> = HashSet::new();
+    let mut uncommitted_docs: HashSet<u64> = HashSet::new();
+
+    for _ in 0..get_num_iterations() {
+        let random_val = rng.gen_range(0..20);
+        if random_val == 0 {
+            index_writer.commit()?;
+            committed_docs.extend(&uncommitted_docs);
+            uncommitted_docs.clear();
+            reader.reload()?;
+            let searcher = reader.searcher();
+            // check that everything is correct.
+            check_index_content(
+                &searcher,
+                &committed_docs.iter().cloned().collect::<Vec<u64>>(),
+            )?;
+        } else if committed_docs.remove(&random_val) || uncommitted_docs.remove(&random_val) {
+            let doc_id_term = Term::from_field_u64(id_field, random_val);
+            index_writer.delete_term(doc_id_term);
+        } else {
+            uncommitted_docs.insert(random_val);
+            let mut doc = Document::new();
+            doc.add_u64(id_field, random_val);
+            for i in 1u64..10u64 {
+                doc.add_u64(multiples_field, random_val * i);
+            }
+            doc.add_text(text_field, get_text());
            index_writer.add_document(doc);
        }
    }
--- a/src/indexer/doc_id_mapping.rs
+++ b/src/indexer/doc_id_mapping.rs
@@ -2,23 +2,23 @@
 //! to get mappings from old doc_id to new doc_id and vice versa, after sorting
 //!

-use super::{merger::SegmentReaderWithOrdinal, SegmentWriter};
+use super::SegmentWriter;
 use crate::{
    schema::{Field, Schema},
-    DocId, IndexSortByField, Order, TantivyError,
+    DocId, IndexSortByField, Order, SegmentOrdinal, TantivyError,
 };
 use std::{cmp::Reverse, ops::Index};

 /// Struct to provide mapping from new doc_id to old doc_id and segment.
 #[derive(Clone)]
-pub(crate) struct SegmentDocidMapping<'a> {
-    new_doc_id_to_old_and_segment: Vec<(DocId, SegmentReaderWithOrdinal<'a>)>,
+pub(crate) struct SegmentDocidMapping {
+    new_doc_id_to_old_and_segment: Vec<(DocId, SegmentOrdinal)>,
    is_trivial: bool,
 }

-impl<'a> SegmentDocidMapping<'a> {
+impl SegmentDocidMapping {
    pub(crate) fn new(
-        new_doc_id_to_old_and_segment: Vec<(DocId, SegmentReaderWithOrdinal<'a>)>,
+        new_doc_id_to_old_and_segment: Vec<(DocId, SegmentOrdinal)>,
        is_trivial: bool,
    ) -> Self {
        Self {
@@ -26,7 +26,7 @@ impl<'a> SegmentDocidMapping<'a> {
            is_trivial,
        }
    }
-    pub(crate) fn iter(&self) -> impl Iterator<Item = &(DocId, SegmentReaderWithOrdinal)> {
+    pub(crate) fn iter(&self) -> impl Iterator<Item = &(DocId, SegmentOrdinal)> {
        self.new_doc_id_to_old_and_segment.iter()
    }
    pub(crate) fn len(&self) -> usize {
@@ -40,15 +40,15 @@ impl<'a> SegmentDocidMapping<'a> {
        self.is_trivial
    }
 }
-impl<'a> Index<usize> for SegmentDocidMapping<'a> {
-    type Output = (DocId, SegmentReaderWithOrdinal<'a>);
+impl Index<usize> for SegmentDocidMapping {
+    type Output = (DocId, SegmentOrdinal);

    fn index(&self, idx: usize) -> &Self::Output {
        &self.new_doc_id_to_old_and_segment[idx]
    }
 }
-impl<'a> IntoIterator for SegmentDocidMapping<'a> {
-    type Item = (DocId, SegmentReaderWithOrdinal<'a>);
+impl IntoIterator for SegmentDocidMapping {
+    type Item = (DocId, SegmentOrdinal);
    type IntoIter = std::vec::IntoIter<Self::Item>;

    fn into_iter(self) -> Self::IntoIter {
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -1,7 +1,6 @@
 use super::operation::{AddOperation, UserOperation};
 use super::segment_updater::SegmentUpdater;
 use super::PreparedCommit;
-use crate::common::BitSet;
 use crate::core::Index;
 use crate::core::Segment;
 use crate::core::SegmentComponent;
@@ -12,7 +11,7 @@ use crate::directory::TerminatingWrite;
 use crate::directory::{DirectoryLock, GarbageCollectionResult};
 use crate::docset::{DocSet, TERMINATED};
 use crate::error::TantivyError;
-use crate::fastfield::write_delete_bitset;
+use crate::fastfield::write_alive_bitset;
 use crate::indexer::delete_queue::{DeleteCursor, DeleteQueue};
 use crate::indexer::doc_opstamp_mapping::DocToOpstampMapping;
 use crate::indexer::operation::DeleteOperation;
@@ -24,14 +23,18 @@ use crate::schema::Document;
 use crate::schema::IndexRecordOption;
 use crate::schema::Term;
 use crate::Opstamp;
+use common::BitSet;
 use crossbeam::channel;
 use futures::executor::block_on;
 use futures::future::Future;
 use smallvec::smallvec;
 use smallvec::SmallVec;
+use wasm_mt_pool::pool_exec;
+use wasm_mt::prelude::*;
 use std::mem;
 use std::ops::Range;
 use std::sync::Arc;
+use wasm_mt_pool::prelude::*;
 use std::thread;
 use std::thread::JoinHandle;

@@ -75,7 +78,7 @@ pub struct IndexWriter {

    heap_size_in_bytes_per_thread: usize,

-    workers_join_handle: Vec<JoinHandle<crate::Result<()>>>,
+    workers_join_handle: Vec<JoinHandle<Result<JsValue, JsValue>>>,

    operation_receiver: OperationReceiver,
    operation_sender: OperationSender,
@@ -90,10 +93,12 @@ pub struct IndexWriter {

    stamper: Stamper,
    committed_opstamp: Opstamp,
+
+    worker_pool: wasm_mt_pool::ThreadPool,
 }

 fn compute_deleted_bitset(
-    delete_bitset: &mut BitSet,
+    alive_bitset: &mut BitSet,
    segment_reader: &SegmentReader,
    delete_cursor: &mut DeleteCursor,
    doc_opstamps: &DocToOpstampMapping,
@@ -114,7 +119,7 @@ fn compute_deleted_bitset(
            let mut doc_matching_deleted_term = docset.doc();
            while doc_matching_deleted_term != TERMINATED {
                if doc_opstamps.is_deleted(doc_matching_deleted_term, delete_op.opstamp) {
-                    delete_bitset.insert(doc_matching_deleted_term);
+                    alive_bitset.remove(doc_matching_deleted_term);
                    might_have_changed = true;
                }
                doc_matching_deleted_term = docset.advance();
@@ -141,7 +146,7 @@ pub(crate) fn advance_deletes(
        return Ok(());
    }

-    if segment_entry.delete_bitset().is_none() && segment_entry.delete_cursor().get().is_none() {
+    if segment_entry.alive_bitset().is_none() && segment_entry.delete_cursor().get().is_none() {
        // There has been no `DeleteOperation` between the segment status and `target_opstamp`.
        return Ok(());
    }
@@ -149,15 +154,15 @@ pub(crate) fn advance_deletes(
    let segment_reader = SegmentReader::open(&segment)?;

    let max_doc = segment_reader.max_doc();
-    let mut delete_bitset: BitSet = match segment_entry.delete_bitset() {
-        Some(previous_delete_bitset) => (*previous_delete_bitset).clone(),
-        None => BitSet::with_max_value(max_doc),
+    let mut alive_bitset: BitSet = match segment_entry.alive_bitset() {
+        Some(previous_alive_bitset) => (*previous_alive_bitset).clone(),
+        None => BitSet::with_max_value_and_full(max_doc),
    };

    let num_deleted_docs_before = segment.meta().num_deleted_docs();

    compute_deleted_bitset(
-        &mut delete_bitset,
+        &mut alive_bitset,
        &segment_reader,
        segment_entry.delete_cursor(),
        &DocToOpstampMapping::None,
@@ -167,20 +172,21 @@ pub(crate) fn advance_deletes(
    // TODO optimize
    // It should be possible to do something smarter by manipulation bitsets directly
    // to compute this union.
-    if let Some(seg_delete_bitset) = segment_reader.delete_bitset() {
+    if let Some(seg_alive_bitset) = segment_reader.alive_bitset() {
        for doc in 0u32..max_doc {
-            if seg_delete_bitset.is_deleted(doc) {
-                delete_bitset.insert(doc);
+            if seg_alive_bitset.is_deleted(doc) {
+                alive_bitset.remove(doc);
            }
        }
    }

-    let num_deleted_docs: u32 = delete_bitset.len() as u32;
+    let num_alive_docs: u32 = alive_bitset.len() as u32;
+    let num_deleted_docs = max_doc - num_alive_docs;
    if num_deleted_docs > num_deleted_docs_before {
        // There are new deletes. We need to write a new delete file.
        segment = segment.with_delete_meta(num_deleted_docs as u32, target_opstamp);
        let mut delete_file = segment.open_write(SegmentComponent::Delete)?;
-        write_delete_bitset(&delete_bitset, max_doc, &mut delete_file)?;
+        write_alive_bitset(&alive_bitset, &mut delete_file)?;
        delete_file.terminate()?;
    }

@@ -226,13 +232,12 @@ fn index_documents(

    let segment_with_max_doc = segment.with_max_doc(max_doc);

-    let delete_bitset_opt =
-        apply_deletes(&segment_with_max_doc, &mut delete_cursor, &doc_opstamps)?;
+    let alive_bitset_opt = apply_deletes(&segment_with_max_doc, &mut delete_cursor, &doc_opstamps)?;

    let meta = segment_with_max_doc.meta().clone();
    meta.untrack_temp_docstore();
    // update segment_updater inventory to remove tempstore
-    let segment_entry = SegmentEntry::new(meta, delete_cursor, delete_bitset_opt);
+    let segment_entry = SegmentEntry::new(meta, delete_cursor, alive_bitset_opt);
    block_on(segment_updater.schedule_add_segment(segment_entry))?;
    Ok(true)
 }
@@ -259,7 +264,7 @@ fn apply_deletes(
    let doc_to_opstamps = DocToOpstampMapping::WithMap(doc_opstamps);

    let max_doc = segment.meta().max_doc();
-    let mut deleted_bitset = BitSet::with_max_value(max_doc);
+    let mut deleted_bitset = BitSet::with_max_value_and_full(max_doc);
    let may_have_deletes = compute_deleted_bitset(
        &mut deleted_bitset,
        &segment_reader,
@@ -318,6 +323,7 @@ impl IndexWriter {
        let segment_updater =
            SegmentUpdater::create(index.clone(), stamper.clone(), &delete_queue.cursor())?;

+        let worker_pool = block_on(wasm_mt_pool::ThreadPool::new(num_threads, crate::PKG_JS).and_init()).unwrap();
        let mut index_writer = IndexWriter {
            _directory_lock: Some(directory_lock),

@@ -338,6 +344,7 @@ impl IndexWriter {
            stamper,

            worker_id: 0,
+            worker_pool,
        };
        index_writer.start_workers()?;
        Ok(index_writer)
@@ -348,6 +355,11 @@ impl IndexWriter {
        self.operation_sender = sender;
    }

+    /// Accessor to the index.
+    pub fn index(&self) -> &Index {
+        &self.index
+    }
+
    /// If there are some merging threads, blocks until they all finish their work and
    /// then drop the `IndexWriter`.
    pub fn wait_merging_threads(mut self) -> crate::Result<()> {
@@ -406,9 +418,8 @@ impl IndexWriter {

        let mem_budget = self.heap_size_in_bytes_per_thread;
        let index = self.index.clone();
-        let join_handle: JoinHandle<crate::Result<()>> = thread::Builder::new()
-            .name(format!("thrd-tantivy-index{}", self.worker_id))
-            .spawn(move || {
+        let join_handle: JoinHandle<crate::Result<_>> = pool_exec!(self.worker_pool,
+            move || {
                loop {
                    let mut document_iterator =
                        document_receiver_clone.clone().into_iter().peekable();
@@ -849,7 +860,7 @@ mod tests {
        let reader = index.reader().unwrap();
        let searcher = reader.searcher();
        assert_eq!(searcher.segment_readers().len(), 1);
-        assert_eq!(searcher.segment_reader(0u32).num_deleted_docs(), 0);
+        assert_eq!(searcher.segment_reader(0u32).num_docs(), 2);

        index_writer.delete_term(Term::from_field_text(text_field, "hello1"));
        assert!(index_writer.commit().is_ok());
@@ -857,7 +868,7 @@ mod tests {
        assert!(reader.reload().is_ok());
        let searcher = reader.searcher();
        assert_eq!(searcher.segment_readers().len(), 1);
-        assert_eq!(searcher.segment_reader(0u32).num_deleted_docs(), 1);
+        assert_eq!(searcher.segment_reader(0u32).num_docs(), 1);

        let previous_delete_opstamp = index.load_metas().unwrap().segments[0].delete_opstamp();

@@ -869,7 +880,7 @@ mod tests {
        assert!(reader.reload().is_ok());
        let searcher = reader.searcher();
        assert_eq!(searcher.segment_readers().len(), 1);
-        assert_eq!(searcher.segment_reader(0u32).num_deleted_docs(), 1);
+        assert_eq!(searcher.segment_reader(0u32).num_docs(), 1);

        let after_delete_opstamp = index.load_metas().unwrap().segments[0].delete_opstamp();
        assert_eq!(after_delete_opstamp, previous_delete_opstamp);
@@ -1361,6 +1372,7 @@ mod tests {
        AddDoc { id: u64 },
        DeleteDoc { id: u64 },
        Commit,
+        Merge,
    }

    fn operation_strategy() -> impl Strategy<Value = IndexingOp> {
@@ -1368,6 +1380,7 @@ mod tests {
            (0u64..10u64).prop_map(|id| IndexingOp::DeleteDoc { id }),
            (0u64..10u64).prop_map(|id| IndexingOp::AddDoc { id }),
            (0u64..2u64).prop_map(|_| IndexingOp::Commit),
+            (0u64..1u64).prop_map(|_| IndexingOp::Merge),
        ]
    }

@@ -1393,7 +1406,7 @@ mod tests {
    fn test_operation_strategy(
        ops: &[IndexingOp],
        sort_index: bool,
-        force_merge: bool,
+        force_end_merge: bool,
    ) -> crate::Result<()> {
        let mut schema_builder = schema::Schema::builder();
        let id_field = schema_builder.add_u64_field("id", FAST | INDEXED | STORED);
@@ -1435,6 +1448,8 @@ mod tests {
            .settings(settings)
            .create_in_ram()?;
        let mut index_writer = index.writer_for_tests()?;
+        index_writer.set_merge_policy(Box::new(NoMergePolicy));
+
        for &op in ops {
            match op {
                IndexingOp::AddDoc { id } => {
@@ -1448,12 +1463,21 @@ mod tests {
                IndexingOp::Commit => {
                    index_writer.commit()?;
                }
+                IndexingOp::Merge => {
+                    let segment_ids = index
+                        .searchable_segment_ids()
+                        .expect("Searchable segments failed.");
+                    if segment_ids.len() >= 2 {
+                        block_on(index_writer.merge(&segment_ids)).unwrap();
+                        assert!(index_writer.segment_updater().wait_merging_thread().is_ok());
+                    }
+                }
            }
        }
        index_writer.commit()?;

        let searcher = index.reader()?.searcher();
-        if force_merge {
+        if force_end_merge {
            index_writer.wait_merging_threads()?;
            let mut index_writer = index.writer_for_tests()?;
            let segment_ids = index
@@ -1500,7 +1524,7 @@ mod tests {
        for segment_reader in searcher.segment_readers().iter() {
            let store_reader = segment_reader.get_store_reader().unwrap();
            // test store iterator
-            for doc in store_reader.iter(segment_reader.delete_bitset()) {
+            for doc in store_reader.iter(segment_reader.alive_bitset()) {
                let id = doc
                    .unwrap()
                    .get_first(id_field)
@@ -1626,7 +1650,7 @@ mod tests {

        let segment_reader = searcher.segment_reader(0);
        assert_eq!(segment_reader.max_doc(), 2);
-        assert_eq!(segment_reader.num_deleted_docs(), 1);
+        assert_eq!(segment_reader.num_docs(), 1);
        Ok(())
    }

--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -1,10 +1,10 @@
 use crate::error::DataCorruption;
 use crate::fastfield::CompositeFastFieldSerializer;
-use crate::fastfield::DeleteBitSet;
 use crate::fastfield::DynamicFastFieldReader;
 use crate::fastfield::FastFieldDataAccess;
 use crate::fastfield::FastFieldReader;
 use crate::fastfield::FastFieldStats;
+use crate::fastfield::MultiValueLength;
 use crate::fastfield::MultiValuedFastFieldReader;
 use crate::fieldnorm::FieldNormsSerializer;
 use crate::fieldnorm::FieldNormsWriter;
@@ -19,9 +19,8 @@ use crate::schema::{Field, Schema};
 use crate::store::StoreWriter;
 use crate::termdict::TermMerger;
 use crate::termdict::TermOrdinal;
+use crate::IndexSettings;
 use crate::IndexSortByField;
-use crate::{common::HasLen, fastfield::MultiValueLength};
-use crate::{common::MAX_DOC_LIMIT, IndexSettings};
 use crate::{core::Segment, indexer::doc_id_mapping::expect_field_id_for_sort_field};
 use crate::{core::SegmentReader, Order};
 use crate::{
@@ -36,6 +35,11 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use tantivy_bitpacker::minmax;

+/// Segment's max doc must be `< MAX_DOC_LIMIT`.
+///
+/// We do not allow segments with more than
+pub const MAX_DOC_LIMIT: u32 = 1 << 31;
+
 fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::Result<u64> {
    let mut total_tokens = 0u64;
    let mut count: [usize; 256] = [0; 256];
@@ -63,58 +67,33 @@ fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::R
            .sum::<u64>())
 }

-/// `ReaderWithOrdinal` is used to be able to easier associate
-/// data with a `SegmentReader`. The ordinal is supposed to be
-/// used as an index access.
-///
-/// The ordinal identifies the position within `Merger` readers.
-#[derive(Clone, Copy)]
-pub(crate) struct SegmentReaderWithOrdinal<'a> {
-    pub reader: &'a SegmentReader,
-    pub ordinal: SegmentOrdinal,
-}
-
-impl<'a> From<(usize, &'a SegmentReader)> for SegmentReaderWithOrdinal<'a> {
-    fn from(data: (usize, &'a SegmentReader)) -> Self {
-        SegmentReaderWithOrdinal {
-            reader: data.1,
-            ordinal: data.0 as u32,
-        }
-    }
-}
-
 pub struct IndexMerger {
    index_settings: IndexSettings,
    schema: Schema,
-    readers: Vec<SegmentReader>,
+    pub(crate) readers: Vec<SegmentReader>,
    max_doc: u32,
 }

 fn compute_min_max_val(
    u64_reader: &impl FastFieldReader<u64>,
-    max_doc: DocId,
-    delete_bitset_opt: Option<&DeleteBitSet>,
+    segment_reader: &SegmentReader,
 ) -> Option<(u64, u64)> {
-    if max_doc == 0 {
-        None
-    } else {
-        match delete_bitset_opt {
-            Some(delete_bitset) => {
-                // some deleted documents,
-                // we need to recompute the max / min
-                minmax(
-                    (0..max_doc)
-                        .filter(|doc_id| delete_bitset.is_alive(*doc_id))
-                        .map(|doc_id| u64_reader.get(doc_id)),
-                )
-            }
-            None => {
-                // no deleted documents,
-                // we can use the previous min_val, max_val.
-                Some((u64_reader.min_value(), u64_reader.max_value()))
-            }
-        }
+    if segment_reader.max_doc() == 0 {
+        return None;
    }
+
+    if segment_reader.alive_bitset().is_none() {
+        // no deleted documents,
+        // we can use the previous min_val, max_val.
+        return Some((u64_reader.min_value(), u64_reader.max_value()));
+    }
+    // some deleted documents,
+    // we need to recompute the max / min
+    minmax(
+        segment_reader
+            .doc_ids_alive()
+            .map(|doc_id| u64_reader.get(doc_id)),
+    )
 }

 struct TermOrdinalMapping {
@@ -144,7 +123,7 @@ impl TermOrdinalMapping {
            .iter()
            .flat_map(|term_ordinals| term_ordinals.iter().cloned().max())
            .max()
-            .unwrap_or_else(TermOrdinal::default)
+            .unwrap_or_default()
    }
 }

@@ -246,8 +225,8 @@ impl IndexMerger {
                .iter()
                .map(|reader| reader.get_fieldnorms_reader(field))
                .collect::<Result<_, _>>()?;
-            for (doc_id, reader_with_ordinal) in doc_id_mapping.iter() {
-                let fieldnorms_reader = &fieldnorms_readers[reader_with_ordinal.ordinal as usize];
+            for (doc_id, reader_ordinal) in doc_id_mapping.iter() {
+                let fieldnorms_reader = &fieldnorms_readers[*reader_ordinal as usize];
                let fieldnorm_id = fieldnorms_reader.fieldnorm_id(*doc_id);
                fieldnorms_data.push(fieldnorm_id);
            }
@@ -320,7 +299,7 @@ impl IndexMerger {
                .fast_fields()
                .typed_fast_field_reader(field)
                .expect("Failed to find a reader for single fast field. This is a tantivy bug and it should never happen.");
-                compute_min_max_val(&u64_reader, reader.max_doc(), reader.delete_bitset())
+                compute_min_max_val(&u64_reader, reader)
            })
            .flatten()
            .reduce(|a, b| {
@@ -346,25 +325,25 @@ impl IndexMerger {
        };
        #[derive(Clone)]
        struct SortedDocidFieldAccessProvider<'a> {
-            doc_id_mapping: &'a SegmentDocidMapping<'a>,
+            doc_id_mapping: &'a SegmentDocidMapping,
            fast_field_readers: &'a Vec<DynamicFastFieldReader<u64>>,
        }
        impl<'a> FastFieldDataAccess for SortedDocidFieldAccessProvider<'a> {
            fn get_val(&self, doc: u64) -> u64 {
-                let (doc_id, reader_with_ordinal) = self.doc_id_mapping[doc as usize];
-                self.fast_field_readers[reader_with_ordinal.ordinal as usize].get(doc_id)
+                let (doc_id, reader_ordinal) = self.doc_id_mapping[doc as usize];
+                self.fast_field_readers[reader_ordinal as usize].get(doc_id)
            }
        }
        let fastfield_accessor = SortedDocidFieldAccessProvider {
            doc_id_mapping,
            fast_field_readers: &fast_field_readers,
        };
-        let iter1 = doc_id_mapping.iter().map(|(doc_id, reader_with_ordinal)| {
-            let fast_field_reader = &fast_field_readers[reader_with_ordinal.ordinal as usize];
+        let iter1 = doc_id_mapping.iter().map(|(doc_id, reader_ordinal)| {
+            let fast_field_reader = &fast_field_readers[*reader_ordinal as usize];
            fast_field_reader.get(*doc_id)
        });
-        let iter2 = doc_id_mapping.iter().map(|(doc_id, reader_with_ordinal)| {
-            let fast_field_reader = &fast_field_readers[reader_with_ordinal.ordinal as usize];
+        let iter2 = doc_id_mapping.iter().map(|(doc_id, reader_ordinal)| {
+            let fast_field_reader = &fast_field_readers[*reader_ordinal as usize];
            fast_field_reader.get(*doc_id)
        });
        fast_field_serializer.create_auto_detect_u64_fast_field(
@@ -384,9 +363,10 @@ impl IndexMerger {
        &self,
        sort_by_field: &IndexSortByField,
    ) -> crate::Result<bool> {
-        let reader_and_field_accessors = self.get_reader_with_sort_field_accessor(sort_by_field)?;
+        let reader_ordinal_and_field_accessors =
+            self.get_reader_with_sort_field_accessor(sort_by_field)?;

-        let everything_is_in_order = reader_and_field_accessors
+        let everything_is_in_order = reader_ordinal_and_field_accessors
            .into_iter()
            .map(|reader| reader.1)
            .tuple_windows()
@@ -412,24 +392,21 @@ impl IndexMerger {
    pub(crate) fn get_reader_with_sort_field_accessor<'a, 'b>(
        &'a self,
        sort_by_field: &'b IndexSortByField,
-    ) -> crate::Result<
-        Vec<(
-            SegmentReaderWithOrdinal<'a>,
-            impl FastFieldReader<u64> + Clone,
-        )>,
-    > {
-        let reader_and_field_accessors = self
+    ) -> crate::Result<Vec<(SegmentOrdinal, impl FastFieldReader<u64> + Clone)>> {
+        let reader_ordinal_and_field_accessors = self
            .readers
            .iter()
            .enumerate()
-            .map(Into::into)
-            .map(|reader_with_ordinal: SegmentReaderWithOrdinal| {
-                let value_accessor =
-                    Self::get_sort_field_accessor(reader_with_ordinal.reader, sort_by_field)?;
-                Ok((reader_with_ordinal, value_accessor))
+            .map(|(reader_ordinal, _)| reader_ordinal as SegmentOrdinal)
+            .map(|reader_ordinal: SegmentOrdinal| {
+                let value_accessor = Self::get_sort_field_accessor(
+                    &self.readers[reader_ordinal as usize],
+                    sort_by_field,
+                )?;
+                Ok((reader_ordinal, value_accessor))
            })
            .collect::<crate::Result<Vec<_>>>()?;
-        Ok(reader_and_field_accessors)
+        Ok(reader_ordinal_and_field_accessors)
    }

    /// Generates the doc_id mapping where position in the vec=new
@@ -440,50 +417,54 @@ impl IndexMerger {
        &self,
        sort_by_field: &IndexSortByField,
    ) -> crate::Result<SegmentDocidMapping> {
-        let reader_and_field_accessors = self.get_reader_with_sort_field_accessor(sort_by_field)?;
+        let reader_ordinal_and_field_accessors =
+            self.get_reader_with_sort_field_accessor(sort_by_field)?;
        // Loading the field accessor on demand causes a 15x regression

        // create iterators over segment/sort_accessor/doc_id  tuple
        let doc_id_reader_pair =
-            reader_and_field_accessors
+            reader_ordinal_and_field_accessors
                .iter()
                .map(|reader_and_field_accessor| {
-                    reader_and_field_accessor
-                        .0
-                        .reader
-                        .doc_ids_alive()
-                        .map(move |doc_id| {
-                            (
-                                doc_id,
-                                reader_and_field_accessor.0,
-                                &reader_and_field_accessor.1,
-                            )
-                        })
+                    let reader = &self.readers[reader_and_field_accessor.0 as usize];
+                    reader.doc_ids_alive().map(move |doc_id| {
+                        (
+                            doc_id,
+                            reader_and_field_accessor.0,
+                            &reader_and_field_accessor.1,
+                        )
+                    })
                });

+        let total_num_new_docs = self
+            .readers
+            .iter()
+            .map(|reader| reader.num_docs() as usize)
+            .sum();
+
+        let mut sorted_doc_ids = Vec::with_capacity(total_num_new_docs);
+
        // create iterator tuple of (old doc_id, reader) in order of the new doc_ids
-        let sorted_doc_ids: Vec<(DocId, SegmentReaderWithOrdinal)> = doc_id_reader_pair
-            .into_iter()
-            .kmerge_by(|a, b| {
-                let val1 = a.2.get(a.0);
-                let val2 = b.2.get(b.0);
-                if sort_by_field.order == Order::Asc {
-                    val1 < val2
-                } else {
-                    val1 > val2
-                }
-            })
-            .map(|(doc_id, reader_with_id, _)| (doc_id, reader_with_id))
-            .collect::<Vec<_>>();
+        sorted_doc_ids.extend(
+            doc_id_reader_pair
+                .into_iter()
+                .kmerge_by(|a, b| {
+                    let val1 = a.2.get(a.0);
+                    let val2 = b.2.get(b.0);
+                    if sort_by_field.order == Order::Asc {
+                        val1 < val2
+                    } else {
+                        val1 > val2
+                    }
+                })
+                .map(|(doc_id, reader_with_id, _)| (doc_id, reader_with_id)),
+        );
        Ok(SegmentDocidMapping::new(sorted_doc_ids, false))
    }

    // Creating the index file to point into the data, generic over `BytesFastFieldReader` and
    // `MultiValuedFastFieldReader`
    //
-    // Important: reader_and_field_accessor needs
-    // to have the same order as self.readers since ReaderWithOrdinal
-    // is used to index the reader_and_field_accessors vec.
    fn write_1_n_fast_field_idx_generic<T: MultiValueLength>(
        field: Field,
        fast_field_serializer: &mut CompositeFastFieldSerializer,
@@ -495,25 +476,24 @@ impl IndexMerger {
        //
        // This is required by the bitpacker, as it needs to know
        // what should be the bit length use for bitpacking.
-        let mut idx_num_vals = 0;
+        let mut num_docs = 0;
        for (reader, u64s_reader) in reader_and_field_accessors.iter() {
-            if let Some(delete_bitset) = reader.delete_bitset() {
-                idx_num_vals += reader.max_doc() as u64 - delete_bitset.len() as u64;
-                for doc in 0u32..reader.max_doc() {
-                    if delete_bitset.is_alive(doc) {
-                        let num_vals = u64s_reader.get_len(doc) as u64;
-                        total_num_vals += num_vals;
-                    }
+            if let Some(alive_bitset) = reader.alive_bitset() {
+                num_docs += alive_bitset.num_alive_docs() as u64;
+                for doc in reader.doc_ids_alive() {
+                    let num_vals = u64s_reader.get_len(doc) as u64;
+                    total_num_vals += num_vals;
                }
            } else {
-                idx_num_vals += reader.max_doc() as u64;
+                num_docs += reader.max_doc() as u64;
                total_num_vals += u64s_reader.get_total_len();
            }
        }

        let stats = FastFieldStats {
            max_value: total_num_vals,
-            num_vals: idx_num_vals,
+            // The fastfield offset index contains (num_docs + 1) values.
+            num_vals: num_docs + 1,
            min_value: 0,
        };
        // We can now create our `idx` serializer, and in a second pass,
@@ -524,10 +504,10 @@ impl IndexMerger {
        // acccess on the fly or 2. change the codec api to make random access optional, but
        // they both have also major drawbacks.

-        let mut offsets = vec![];
+        let mut offsets = Vec::with_capacity(doc_id_mapping.len());
        let mut offset = 0;
        for (doc_id, reader) in doc_id_mapping.iter() {
-            let reader = &reader_and_field_accessors[reader.ordinal as usize].1;
+            let reader = &reader_and_field_accessors[*reader as usize].1;
            offsets.push(offset);
            offset += reader.get_len(*doc_id) as u64;
        }
@@ -549,7 +529,7 @@ impl IndexMerger {
        fast_field_serializer: &mut CompositeFastFieldSerializer,
        doc_id_mapping: &SegmentDocidMapping,
    ) -> crate::Result<Vec<u64>> {
-        let reader_and_field_accessors = self.readers.iter().map(|reader|{
+        let reader_ordinal_and_field_accessors = self.readers.iter().map(|reader|{
            let u64s_reader: MultiValuedFastFieldReader<u64> = reader.fast_fields()
                .typed_fast_field_multi_reader(field)
                .expect("Failed to find index for multivalued field. This is a bug in tantivy, please report.");
@@ -560,7 +540,7 @@ impl IndexMerger {
            field,
            fast_field_serializer,
            doc_id_mapping,
-            &reader_and_field_accessors,
+            &reader_ordinal_and_field_accessors,
        )
    }

@@ -599,11 +579,11 @@ impl IndexMerger {
                fast_field_serializer.new_u64_fast_field_with_idx(field, 0u64, max_term_ord, 1)?;
            let mut vals = Vec::with_capacity(100);

-            for (old_doc_id, reader_with_ordinal) in doc_id_mapping.iter() {
+            for (old_doc_id, reader_ordinal) in doc_id_mapping.iter() {
                let term_ordinal_mapping: &[TermOrdinal] =
-                    term_ordinal_mappings.get_segment(reader_with_ordinal.ordinal as usize);
+                    term_ordinal_mappings.get_segment(*reader_ordinal as usize);

-                let ff_reader = &fast_field_reader[reader_with_ordinal.ordinal as usize];
+                let ff_reader = &fast_field_reader[*reader_ordinal as usize];
                ff_reader.get_vals(*old_doc_id, &mut vals);
                for &prev_term_ord in &vals {
                    let new_term_ord = term_ordinal_mapping[prev_term_ord as usize];
@@ -619,21 +599,25 @@ impl IndexMerger {
    /// Creates a mapping if the segments are stacked. this is helpful to merge codelines between index
    /// sorting and the others
    pub(crate) fn get_doc_id_from_concatenated_data(&self) -> crate::Result<SegmentDocidMapping> {
-        let mapping: Vec<_> = self
+        let total_num_new_docs = self
            .readers
            .iter()
-            .enumerate()
-            .map(|(ordinal, reader)| {
-                let reader_with_ordinal = SegmentReaderWithOrdinal {
-                    ordinal: ordinal as u32,
-                    reader,
-                };
-                reader
-                    .doc_ids_alive()
-                    .map(move |doc_id| (doc_id, reader_with_ordinal))
-            })
-            .flatten()
-            .collect();
+            .map(|reader| reader.num_docs() as usize)
+            .sum();
+
+        let mut mapping = Vec::with_capacity(total_num_new_docs);
+
+        mapping.extend(
+            self.readers
+                .iter()
+                .enumerate()
+                .map(|(reader_ordinal, reader)| {
+                    reader
+                        .doc_ids_alive()
+                        .map(move |doc_id| (doc_id, reader_ordinal as SegmentOrdinal))
+                })
+                .flatten(),
+        );
        Ok(SegmentDocidMapping::new(mapping, true))
    }
    fn write_multi_fast_field(
@@ -697,7 +681,7 @@ impl IndexMerger {
        };

        struct SortedDocidMultiValueAccessProvider<'a> {
-            doc_id_mapping: &'a SegmentDocidMapping<'a>,
+            doc_id_mapping: &'a SegmentDocidMapping,
            fast_field_readers: &'a Vec<MultiValuedFastFieldReader<u64>>,
            offsets: Vec<u64>,
        }
@@ -716,13 +700,11 @@ impl IndexMerger {
                let num_pos_covered_until_now = self.offsets[new_docid];
                let pos_in_values = pos - num_pos_covered_until_now;

-                let (old_doc_id, reader_with_ordinal) = self.doc_id_mapping[new_docid as usize];
-                let num_vals = self.fast_field_readers[reader_with_ordinal.ordinal as usize]
-                    .get_len(old_doc_id);
+                let (old_doc_id, reader_ordinal) = self.doc_id_mapping[new_docid as usize];
+                let num_vals = self.fast_field_readers[reader_ordinal as usize].get_len(old_doc_id);
                assert!(num_vals >= pos_in_values);
                let mut vals = vec![];
-                self.fast_field_readers[reader_with_ordinal.ordinal as usize]
-                    .get_vals(old_doc_id, &mut vals);
+                self.fast_field_readers[reader_ordinal as usize].get_vals(old_doc_id, &mut vals);

                vals[pos_in_values as usize]
            }
@@ -734,8 +716,8 @@ impl IndexMerger {
        };
        let iter1 = doc_id_mapping
            .iter()
-            .map(|(doc_id, reader_with_ordinal)| {
-                let ff_reader = &ff_readers[reader_with_ordinal.ordinal as usize];
+            .map(|(doc_id, reader_ordinal)| {
+                let ff_reader = &ff_readers[*reader_ordinal as usize];
                let mut vals = vec![];
                ff_reader.get_vals(*doc_id, &mut vals);
                vals.into_iter()
@@ -743,8 +725,8 @@ impl IndexMerger {
            .flatten();
        let iter2 = doc_id_mapping
            .iter()
-            .map(|(doc_id, reader_with_ordinal)| {
-                let ff_reader = &ff_readers[reader_with_ordinal.ordinal as usize];
+            .map(|(doc_id, reader_ordinal)| {
+                let ff_reader = &ff_readers[*reader_ordinal as usize];
                let mut vals = vec![];
                ff_reader.get_vals(*doc_id, &mut vals);
                vals.into_iter()
@@ -786,8 +768,8 @@ impl IndexMerger {
        )?;
        let mut serialize_vals = fast_field_serializer.new_bytes_fast_field_with_idx(field, 1);

-        for (doc_id, reader_with_ordinal) in doc_id_mapping.iter() {
-            let bytes_reader = &reader_and_field_accessors[reader_with_ordinal.ordinal as usize].1;
+        for (doc_id, reader_ordinal) in doc_id_mapping.iter() {
+            let bytes_reader = &reader_and_field_accessors[*reader_ordinal as usize].1;
            let val = bytes_reader.get_bytes(*doc_id);
            serialize_vals.write_all(val)?;
        }
@@ -841,8 +823,8 @@ impl IndexMerger {
                segment_local_map
            })
            .collect();
-        for (new_doc_id, (old_doc_id, segment_and_ordinal)) in doc_id_mapping.iter().enumerate() {
-            let segment_map = &mut merged_doc_id_map[segment_and_ordinal.ordinal as usize];
+        for (new_doc_id, (old_doc_id, segment_ordinal)) in doc_id_mapping.iter().enumerate() {
+            let segment_map = &mut merged_doc_id_map[*segment_ordinal as usize];
            segment_map[*old_doc_id as usize] = Some(new_doc_id as DocId);
        }

@@ -889,9 +871,9 @@ impl IndexMerger {
                let inverted_index: &InvertedIndexReader = &*field_readers[segment_ord];
                let segment_postings = inverted_index
                    .read_postings_from_terminfo(&term_info, segment_postings_option)?;
-                let delete_bitset_opt = segment_reader.delete_bitset();
-                let doc_freq = if let Some(delete_bitset) = delete_bitset_opt {
-                    segment_postings.doc_freq_given_deletes(delete_bitset)
+                let alive_bitset_opt = segment_reader.alive_bitset();
+                let doc_freq = if let Some(alive_bitset) = alive_bitset_opt {
+                    segment_postings.doc_freq_given_deletes(alive_bitset)
                } else {
                    segment_postings.doc_freq()
                };
@@ -958,12 +940,13 @@ impl IndexMerger {
            }
            if !doc_id_mapping.is_trivial() {
                doc_id_and_positions.sort_unstable_by_key(|&(doc_id, _, _)| doc_id);
+
                for (doc_id, term_freq, positions) in &doc_id_and_positions {
-                    field_serializer.write_doc(*doc_id, *term_freq, positions);
+                    let delta_positions = delta_computer.compute_delta(positions);
+                    field_serializer.write_doc(*doc_id, *term_freq, delta_positions);
                }
                doc_id_and_positions.clear();
            }
-
            // closing the term.
            field_serializer.close_term()?;
        }
@@ -1010,11 +993,11 @@ impl IndexMerger {
        let mut document_iterators: Vec<_> = store_readers
            .iter()
            .enumerate()
-            .map(|(i, store)| store.iter_raw(self.readers[i].delete_bitset()))
+            .map(|(i, store)| store.iter_raw(self.readers[i].alive_bitset()))
            .collect();
        if !doc_id_mapping.is_trivial() {
-            for (old_doc_id, reader_with_ordinal) in doc_id_mapping.iter() {
-                let doc_bytes_it = &mut document_iterators[reader_with_ordinal.ordinal as usize];
+            for (old_doc_id, reader_ordinal) in doc_id_mapping.iter() {
+                let doc_bytes_it = &mut document_iterators[*reader_ordinal as usize];
                if let Some(doc_bytes_res) = doc_bytes_it.next() {
                    let doc_bytes = doc_bytes_res?;
                    store_writer.store_bytes(&doc_bytes)?;
@@ -1029,7 +1012,7 @@ impl IndexMerger {
        } else {
            for reader in &self.readers {
                let store_reader = reader.get_store_reader()?;
-                if reader.num_deleted_docs() > 0
+                if reader.has_deletes()
                    // If there is not enough data in the store, we avoid stacking in order to
                    // avoid creating many small blocks in the doc store. Once we have 5 full blocks,
                    // we start stacking. In the worst case 2/7 of the blocks would be very small.
@@ -1046,7 +1029,7 @@ impl IndexMerger {
                    || store_reader.block_checkpoints().take(7).count() < 6
                    || store_reader.compressor() != store_writer.compressor()
                {
-                    for doc_bytes_res in store_reader.iter_raw(reader.delete_bitset()) {
+                    for doc_bytes_res in store_reader.iter_raw(reader.alive_bitset()) {
                        let doc_bytes = doc_bytes_res?;
                        store_writer.store_bytes(&doc_bytes)?;
                    }
@@ -2074,4 +2057,11 @@ mod tests {

        Ok(())
    }
+
+    #[test]
+    fn test_max_doc() {
+        // this is the first time I write a unit test for a constant.
+        assert!(((super::MAX_DOC_LIMIT - 1) as i32) >= 0);
+        assert!((super::MAX_DOC_LIMIT as i32) < 0);
+    }
 }
--- a/src/indexer/merger_sorted_index_test.rs
+++ b/src/indexer/merger_sorted_index_test.rs
@@ -1,6 +1,7 @@
 #[cfg(test)]
 mod tests {
-    use crate::fastfield::FastFieldReader;
+    use crate::fastfield::{AliveBitSet, FastFieldReader};
+    use crate::schema::IndexRecordOption;
    use crate::{
        collector::TopDocs,
        schema::{Cardinality, TextFieldIndexing},
@@ -16,7 +17,7 @@ mod tests {
        schema::{self, BytesOptions},
        DocAddress,
    };
-    use crate::{IndexSettings, Term};
+    use crate::{DocSet, IndexSettings, Postings, Term};
    use futures::executor::block_on;

    fn create_test_index_posting_list_issue(index_settings: Option<IndexSettings>) -> Index {
@@ -104,9 +105,11 @@ mod tests {
            index_writer.add_document(
                doc!(int_field=>3_u64, multi_numbers => 3_u64, multi_numbers => 4_u64, bytes_field => vec![1, 2, 3], text_field => "some text", facet_field=> Facet::from("/book/crime")),
            );
-            index_writer.add_document(doc!(int_field=>1_u64, text_field=> "deleteme"));
            index_writer.add_document(
-                doc!(int_field=>2_u64, multi_numbers => 2_u64, multi_numbers => 3_u64),
+                doc!(int_field=>1_u64, text_field=> "deleteme",  text_field => "ok text more text"),
+            );
+            index_writer.add_document(
+                doc!(int_field=>2_u64, multi_numbers => 2_u64, multi_numbers => 3_u64, text_field => "ok text more text"),
            );

            assert!(index_writer.commit().is_ok());
@@ -118,7 +121,7 @@ mod tests {
            } else {
                1
            };
-            index_writer.add_document(doc!(int_field=>in_val, text_field=> "deleteme", facet_field=> Facet::from("/book/crime")));
+            index_writer.add_document(doc!(int_field=>in_val, text_field=> "deleteme" , text_field => "ok text more text", facet_field=> Facet::from("/book/crime")));
            assert!(index_writer.commit().is_ok());
            // segment 3 - range 5-1000, with force_disjunct_segment_sort_values 50-1000
            let int_vals = if force_disjunct_segment_sort_values {
@@ -243,6 +246,36 @@ mod tests {
            assert_eq!(do_search("biggest"), vec![0]);
        }

+        // postings file
+        {
+            let my_text_field = index.schema().get_field("text_field").unwrap();
+            let term_a = Term::from_field_text(my_text_field, "text");
+            let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
+            let mut postings = inverted_index
+                .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
+                .unwrap()
+                .unwrap();
+
+            assert_eq!(postings.doc_freq(), 2);
+            let fallback_bitset = AliveBitSet::for_test_from_deleted_docs(&[0], 100);
+            assert_eq!(
+                postings.doc_freq_given_deletes(
+                    segment_reader.alive_bitset().unwrap_or(&fallback_bitset)
+                ),
+                2
+            );
+
+            assert_eq!(postings.term_freq(), 1);
+            let mut output = vec![];
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1]);
+            postings.advance();
+
+            assert_eq!(postings.term_freq(), 2);
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1, 3]);
+        }
+
        // access doc store
        {
            let blubber_pos = if force_disjunct_segment_sort_values {
@@ -260,6 +293,69 @@ mod tests {
        }
    }

+    #[test]
+    fn test_merge_unsorted_index() {
+        let index = create_test_index(
+            Some(IndexSettings {
+                ..Default::default()
+            }),
+            false,
+        );
+
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();
+        assert_eq!(searcher.segment_readers().len(), 1);
+        let segment_reader = searcher.segment_readers().last().unwrap();
+
+        let searcher = index.reader().unwrap().searcher();
+        {
+            let my_text_field = index.schema().get_field("text_field").unwrap();
+
+            let do_search = |term: &str| {
+                let query = QueryParser::for_index(&index, vec![my_text_field])
+                    .parse_query(term)
+                    .unwrap();
+                let top_docs: Vec<(f32, DocAddress)> =
+                    searcher.search(&query, &TopDocs::with_limit(3)).unwrap();
+
+                top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>()
+            };
+
+            assert_eq!(do_search("some"), vec![1]);
+            assert_eq!(do_search("blubber"), vec![3]);
+            assert_eq!(do_search("biggest"), vec![4]);
+        }
+
+        // postings file
+        {
+            let my_text_field = index.schema().get_field("text_field").unwrap();
+            let term_a = Term::from_field_text(my_text_field, "text");
+            let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
+            let mut postings = inverted_index
+                .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
+                .unwrap()
+                .unwrap();
+            assert_eq!(postings.doc_freq(), 2);
+            let fallback_bitset = AliveBitSet::for_test_from_deleted_docs(&[0], 100);
+            assert_eq!(
+                postings.doc_freq_given_deletes(
+                    segment_reader.alive_bitset().unwrap_or(&fallback_bitset)
+                ),
+                2
+            );
+
+            assert_eq!(postings.term_freq(), 1);
+            let mut output = vec![];
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1]);
+            postings.advance();
+
+            assert_eq!(postings.term_freq(), 2);
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1, 3]);
+        }
+    }
+
    #[test]
    fn test_merge_sorted_index_asc() {
        let index = create_test_index(
@@ -314,7 +410,7 @@ mod tests {
            let my_text_field = index.schema().get_field("text_field").unwrap();
            let fieldnorm_reader = segment_reader.get_fieldnorms_reader(my_text_field).unwrap();
            assert_eq!(fieldnorm_reader.fieldnorm(0), 0);
-            assert_eq!(fieldnorm_reader.fieldnorm(1), 0);
+            assert_eq!(fieldnorm_reader.fieldnorm(1), 4);
            assert_eq!(fieldnorm_reader.fieldnorm(2), 2); // some text
            assert_eq!(fieldnorm_reader.fieldnorm(3), 1);
            assert_eq!(fieldnorm_reader.fieldnorm(5), 3); // the biggest num
@@ -339,6 +435,34 @@ mod tests {
            assert_eq!(do_search("biggest"), vec![5]);
        }

+        // postings file
+        {
+            let my_text_field = index.schema().get_field("text_field").unwrap();
+            let term_a = Term::from_field_text(my_text_field, "text");
+            let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
+            let mut postings = inverted_index
+                .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
+                .unwrap()
+                .unwrap();
+
+            assert_eq!(postings.doc_freq(), 2);
+            let fallback_bitset = AliveBitSet::for_test_from_deleted_docs(&[0], 100);
+            assert_eq!(
+                postings.doc_freq_given_deletes(
+                    segment_reader.alive_bitset().unwrap_or(&fallback_bitset)
+                ),
+                2
+            );
+
+            let mut output = vec![];
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1, 3]);
+            postings.advance();
+
+            postings.positions(&mut output);
+            assert_eq!(output, vec![1]);
+        }
+
        // access doc store
        {
            let doc = searcher.doc(DocAddress::new(0, 0)).unwrap();
@@ -422,8 +546,9 @@ mod bench_sorted_index_merge {
        let doc_id_mapping = merger.generate_doc_id_mapping(&sort_by_field).unwrap();
        b.iter(|| {

-            let sorted_doc_ids = doc_id_mapping.iter().map(|(doc_id, reader)|{
-            let u64_reader: DynamicFastFieldReader<u64> = reader.reader
+            let sorted_doc_ids = doc_id_mapping.iter().map(|(doc_id, ordinal)|{
+            let reader = &merger.readers[*ordinal as usize];
+            let u64_reader: DynamicFastFieldReader<u64> = reader
                .fast_fields()
                .typed_fast_field_reader(field)
                .expect("Failed to find a reader for single fast field. This is a tantivy bug and it should never happen.");
--- a/src/indexer/segment_entry.rs
+++ b/src/indexer/segment_entry.rs
@@ -1,7 +1,7 @@
-use crate::common::BitSet;
 use crate::core::SegmentId;
 use crate::core::SegmentMeta;
 use crate::indexer::delete_queue::DeleteCursor;
+use common::BitSet;
 use std::fmt;

 /// A segment entry describes the state of
@@ -9,18 +9,16 @@ use std::fmt;
 ///
 /// In addition to segment `meta`,
 /// it contains a few transient states
-/// - `state` expresses whether the segment is already in the
-/// middle of a merge
-/// - `delete_bitset` is a bitset describing
-/// documents that were deleted during the commit
+/// - `alive_bitset` is a bitset describing
+/// documents that were alive during the commit
 /// itself.
 /// - `delete_cursor` is the position in the delete queue.
 /// Deletes happening before the cursor are reflected either
-/// in the .del file or in the `delete_bitset`.
+/// in the .del file or in the `alive_bitset`.
 #[derive(Clone)]
 pub struct SegmentEntry {
    meta: SegmentMeta,
-    delete_bitset: Option<BitSet>,
+    alive_bitset: Option<BitSet>,
    delete_cursor: DeleteCursor,
 }

@@ -29,11 +27,11 @@ impl SegmentEntry {
    pub fn new(
        segment_meta: SegmentMeta,
        delete_cursor: DeleteCursor,
-        delete_bitset: Option<BitSet>,
+        alive_bitset: Option<BitSet>,
    ) -> SegmentEntry {
        SegmentEntry {
            meta: segment_meta,
-            delete_bitset,
+            alive_bitset,
            delete_cursor,
        }
    }
@@ -41,8 +39,8 @@ impl SegmentEntry {
    /// Return a reference to the segment entry deleted bitset.
    ///
    /// `DocId` in this bitset are flagged as deleted.
-    pub fn delete_bitset(&self) -> Option<&BitSet> {
-        self.delete_bitset.as_ref()
+    pub fn alive_bitset(&self) -> Option<&BitSet> {
+        self.alive_bitset.as_ref()
    }

    /// Set the `SegmentMeta` for this segment.
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -11,6 +11,8 @@
 #![doc(test(attr(allow(unused_variables), deny(warnings))))]
 #![warn(missing_docs)]

+#![feature(async_closure)]
+
 //! # `tantivy`
 //!
 //! Tantivy is a search engine library.
@@ -126,6 +128,8 @@ mod macros;
 pub use crate::error::TantivyError;
 pub use chrono;

+pub const PKG_JS: &'static str = "./pkg/pool_exec.js"; // path to `wasm-bindgen`'s JS binding
+
 /// Tantivy result.
 ///
 /// Within tantivy, please avoid importing `Result` using `use crate::Result`
@@ -135,7 +139,6 @@ pub type Result<T> = std::result::Result<T, TantivyError>;
 /// Tantivy DateTime
 pub type DateTime = chrono::DateTime<chrono::Utc>;

-mod common;
 mod core;
 mod indexer;

@@ -163,8 +166,6 @@ pub use self::snippet::{Snippet, SnippetGenerator};

 mod docset;
 pub use self::docset::{DocSet, TERMINATED};
-pub use crate::common::HasLen;
-pub use crate::common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
 pub use crate::core::{Executor, SegmentComponent};
 pub use crate::core::{
    Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, Order, Searcher, Segment,
@@ -178,6 +179,8 @@ pub use crate::indexer::IndexWriter;
 pub use crate::postings::Postings;
 pub use crate::reader::LeasedItem;
 pub use crate::schema::{Document, Term};
+pub use common::HasLen;
+pub use common::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
 use std::fmt;

 use once_cell::sync::Lazy;
@@ -293,7 +296,7 @@ pub struct DocAddress {
 }

 #[cfg(test)]
-mod tests {
+pub mod tests {
    use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
    use crate::core::SegmentReader;
    use crate::docset::{DocSet, TERMINATED};
@@ -304,11 +307,18 @@ mod tests {
    use crate::Index;
    use crate::Postings;
    use crate::ReloadPolicy;
+    use common::{BinarySerializable, FixedSize};
    use rand::distributions::Bernoulli;
    use rand::distributions::Uniform;
    use rand::rngs::StdRng;
    use rand::{Rng, SeedableRng};

+    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
+        let mut buffer = Vec::new();
+        O::default().serialize(&mut buffer).unwrap();
+        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
+    }
+
    /// Checks if left and right are close one to each other.
    /// Panics if the two values are more than 0.5% apart.
    #[macro_export]
@@ -993,8 +1003,24 @@ mod tests {
    #[test]
    fn test_validate_checksum() -> crate::Result<()> {
        let index_path = tempfile::tempdir().expect("dir");
-        let schema = Schema::builder().build();
+        let mut builder = Schema::builder();
+        let body = builder.add_text_field("body", TEXT | STORED);
+        let schema = builder.build();
        let index = Index::create_in_dir(&index_path, schema)?;
+        let mut writer = index.writer(50_000_000)?;
+        for _ in 0..5000 {
+            writer.add_document(doc!(body => "foo"));
+            writer.add_document(doc!(body => "boo"));
+        }
+        writer.commit()?;
+        assert!(index.validate_checksum()?.is_empty());
+
+        // delete few docs
+        writer.delete_term(Term::from_field_text(body, "foo"));
+        writer.commit()?;
+        let segment_ids = index.searchable_segment_ids()?;
+        let _ = futures::executor::block_on(writer.merge(&segment_ids));
+
        assert!(index.validate_checksum()?.is_empty());
        Ok(())
    }
--- a/src/positions/reader.rs
+++ b/src/positions/reader.rs
@@ -1,9 +1,9 @@
 use std::io;

-use crate::common::{BinarySerializable, VInt};
 use crate::directory::OwnedBytes;
 use crate::positions::COMPRESSION_BLOCK_SIZE;
 use crate::postings::compression::{BlockDecoder, VIntDecoder};
+use common::{BinarySerializable, VInt};

 /// When accessing the position of a term, we get a positions_idx from the `Terminfo`.
 /// This means we need to skip to the `nth` positions efficiently.
--- a/src/positions/serializer.rs
+++ b/src/positions/serializer.rs
@@ -1,7 +1,7 @@
-use crate::common::{BinarySerializable, CountingWriter, VInt};
 use crate::positions::COMPRESSION_BLOCK_SIZE;
 use crate::postings::compression::BlockEncoder;
 use crate::postings::compression::VIntEncoder;
+use common::{BinarySerializable, CountingWriter, VInt};
 use std::io::{self, Write};

 /// The PositionSerializer is in charge of serializing all of the positions
--- a/src/postings/block_search.rs
+++ b/src/postings/block_search.rs
@@ -1,241 +1,109 @@
-use std::ops::Range;
+use crate::postings::compression::COMPRESSION_BLOCK_SIZE;

-use crate::postings::compression::AlignedBuffer;
+unsafe fn binary_search_step(ptr: *const u32, target: u32, half_size: isize) -> *const u32 {
+    let mid = ptr.offset(half_size);
+    if *mid < target {
+        mid.offset(1)
+    } else {
+        ptr
+    }
+}

-/// This modules define the logic used to search for a doc in a given
-/// block. (at most 128 docs)
+/// Search the first index containing an element greater or equal to
+/// the target.
 ///
-/// Searching within a block is a hotspot when running intersection.
-/// so it was worth defining it in its own module.
-
-#[cfg(target_arch = "x86_64")]
-mod sse2 {
-    use crate::postings::compression::{AlignedBuffer, COMPRESSION_BLOCK_SIZE};
-    use std::arch::x86_64::__m128i as DataType;
-    use std::arch::x86_64::_mm_add_epi32 as op_add;
-    use std::arch::x86_64::_mm_cmplt_epi32 as op_lt;
-    use std::arch::x86_64::_mm_load_si128 as op_load; // requires 128-bits alignment
-    use std::arch::x86_64::_mm_set1_epi32 as set1;
-    use std::arch::x86_64::_mm_setzero_si128 as set0;
-    use std::arch::x86_64::_mm_sub_epi32 as op_sub;
-    use std::arch::x86_64::{_mm_cvtsi128_si32, _mm_shuffle_epi32};
-
-    const MASK1: i32 = 78;
-    const MASK2: i32 = 177;
-
-    /// Performs an exhaustive linear search over the
-    ///
-    /// There is no early exit here. We simply count the
-    /// number of elements that are `< target`.
-    pub(crate) fn linear_search_sse2_128(arr: &AlignedBuffer, target: u32) -> usize {
-        unsafe {
-            let ptr = arr as *const AlignedBuffer as *const DataType;
-            let vkey = set1(target as i32);
-            let mut cnt = set0();
-            // We work over 4 `__m128i` at a time.
-            // A single `__m128i` actual contains 4 `u32`.
-            for i in 0..(COMPRESSION_BLOCK_SIZE as isize) / (4 * 4) {
-                let cmp1 = op_lt(op_load(ptr.offset(i * 4)), vkey);
-                let cmp2 = op_lt(op_load(ptr.offset(i * 4 + 1)), vkey);
-                let cmp3 = op_lt(op_load(ptr.offset(i * 4 + 2)), vkey);
-                let cmp4 = op_lt(op_load(ptr.offset(i * 4 + 3)), vkey);
-                let sum = op_add(op_add(cmp1, cmp2), op_add(cmp3, cmp4));
-                cnt = op_sub(cnt, sum);
-            }
-            cnt = op_add(cnt, _mm_shuffle_epi32(cnt, MASK1));
-            cnt = op_add(cnt, _mm_shuffle_epi32(cnt, MASK2));
-            _mm_cvtsi128_si32(cnt) as usize
-        }
-    }
-
-    #[cfg(test)]
-    mod test {
-        use super::linear_search_sse2_128;
-        use crate::postings::compression::{AlignedBuffer, COMPRESSION_BLOCK_SIZE};
-
-        #[test]
-        fn test_linear_search_sse2_128_u32() {
-            let mut block = [0u32; COMPRESSION_BLOCK_SIZE];
-            for el in 0u32..128u32 {
-                block[el as usize] = (el * 2 + 1) << 18;
-            }
-            let target = block[64] + 1;
-            assert_eq!(linear_search_sse2_128(&AlignedBuffer(block), target), 65);
-        }
-    }
-}
-
-/// This `linear search` browser exhaustively through the array.
-/// but the early exit is very difficult to predict.
+/// The results should be equivalent to
+/// ```compile_fail
+/// block[..]
+//       .iter()
+//       .take_while(|&&val| val < target)
+//       .count()
+/// ```
 ///
-/// Coupled with `exponential search` this function is likely
-/// to be called with the same `len`
-fn linear_search(arr: &[u32], target: u32) -> usize {
-    arr.iter().map(|&el| if el < target { 1 } else { 0 }).sum()
-}
-
-fn exponential_search(arr: &[u32], target: u32) -> Range<usize> {
-    let end = arr.len();
-    let mut begin = 0;
-    for &pivot in &[1, 3, 7, 15, 31, 63] {
-        if pivot >= end {
-            break;
-        }
-        if arr[pivot] > target {
-            return begin..pivot;
-        }
-        begin = pivot;
-    }
-    begin..end
-}
-
-#[inline(never)]
-fn galloping(block_docs: &[u32], target: u32) -> usize {
-    let range = exponential_search(block_docs, target);
-    range.start + linear_search(&block_docs[range], target)
-}
-
-/// Tantivy may rely on SIMD instructions to search for a specific document within
-/// a given block.
-#[derive(Clone, Copy, PartialEq)]
-pub enum BlockSearcher {
-    #[cfg(target_arch = "x86_64")]
-    Sse2,
-    Scalar,
-}
-
-impl BlockSearcher {
-    /// Search the first index containing an element greater or equal to
-    /// the target.
-    ///
-    /// The results should be equivalent to
-    /// ```compile_fail
-    /// block[..]
-    //       .iter()
-    //       .take_while(|&&val| val < target)
-    //       .count()
-    /// ```
-    ///
-    /// The `start` argument is just used to hint that the response is
-    /// greater than beyond `start`. The implementation may or may not use
-    /// it for optimization.
-    ///
-    /// # Assumption
-    ///
-    /// The array len is > start.
-    /// The block is sorted
-    /// The target is assumed greater or equal to the `arr[start]`.
-    /// The target is assumed smaller or equal to the last element of the block.
-    ///
-    /// Currently the scalar implementation starts by an exponential search, and
-    /// then operates a linear search in the result subarray.
-    ///
-    /// If SSE2 instructions are available in the `(platform, running CPU)`,
-    /// then we use a different implementation that does an exhaustive linear search over
-    /// the block regardless of whether the block is full or not.
-    ///
-    /// Indeed, if the block is not full, the remaining items are TERMINATED.
-    /// It is surprisingly faster, most likely because of the lack of branch misprediction.
-    pub(crate) fn search_in_block(self, block_docs: &AlignedBuffer, target: u32) -> usize {
-        #[cfg(target_arch = "x86_64")]
-        {
-            if self == BlockSearcher::Sse2 {
-                return sse2::linear_search_sse2_128(block_docs, target);
-            }
-        }
-        galloping(&block_docs.0[..], target)
-    }
-}
-
-impl Default for BlockSearcher {
-    fn default() -> BlockSearcher {
-        #[cfg(target_arch = "x86_64")]
-        {
-            if is_x86_feature_detected!("sse2") {
-                return BlockSearcher::Sse2;
-            }
-        }
-        BlockSearcher::Scalar
+/// the `start` argument is just used to hint that the response is
+/// greater than beyond `start`. the implementation may or may not use
+/// it for optimization.
+///
+/// # Assumption
+///
+/// - The block is sorted. Some elements may appear several times. This is the case at the
+/// end of the last block for instance.
+/// - The target is assumed smaller or equal to the last element of the block.
+pub fn branchless_binary_search(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
+    let start_ptr: *const u32 = &arr[0] as *const u32;
+    unsafe {
+        let mut ptr = start_ptr;
+        ptr = binary_search_step(ptr, target, 63);
+        ptr = binary_search_step(ptr, target, 31);
+        ptr = binary_search_step(ptr, target, 15);
+        ptr = binary_search_step(ptr, target, 7);
+        ptr = binary_search_step(ptr, target, 3);
+        ptr = binary_search_step(ptr, target, 1);
+        let extra = if *ptr < target { 1 } else { 0 };
+        (ptr.offset_from(start_ptr) as usize) + extra
    }
 }

 #[cfg(test)]
 mod tests {
-    use super::exponential_search;
-    use super::linear_search;
-    use super::BlockSearcher;
+    use super::branchless_binary_search;
    use crate::docset::TERMINATED;
-    use crate::postings::compression::{AlignedBuffer, COMPRESSION_BLOCK_SIZE};
-
-    #[test]
-    fn test_linear_search() {
-        let len: usize = 50;
-        let arr: Vec<u32> = (0..len).map(|el| 1u32 + (el as u32) * 2).collect();
-        for target in 1..*arr.last().unwrap() {
-            let res = linear_search(&arr[..], target);
-            if res > 0 {
-                assert!(arr[res - 1] < target);
-            }
-            if res < len {
-                assert!(arr[res] >= target);
-            }
-        }
-    }
-
-    #[test]
-    fn test_exponentiel_search() {
-        assert_eq!(exponential_search(&[1, 2], 0), 0..1);
-        assert_eq!(exponential_search(&[1, 2], 1), 0..1);
-        assert_eq!(
-            exponential_search(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 7),
-            3..7
-        );
-    }
-
-    fn util_test_search_in_block(block_searcher: BlockSearcher, block: &[u32], target: u32) {
-        let cursor = search_in_block_trivial_but_slow(block, target);
-        assert!(block.len() < COMPRESSION_BLOCK_SIZE);
-        let mut output_buffer = [TERMINATED; COMPRESSION_BLOCK_SIZE];
-        output_buffer[..block.len()].copy_from_slice(block);
-        assert_eq!(
-            block_searcher.search_in_block(&AlignedBuffer(output_buffer), target),
-            cursor
-        );
-    }
-
-    fn util_test_search_in_block_all(block_searcher: BlockSearcher, block: &[u32]) {
-        use std::collections::HashSet;
-        let mut targets = HashSet::new();
-        for (i, val) in block.iter().cloned().enumerate() {
-            if i > 0 {
-                targets.insert(val - 1);
-            }
-            targets.insert(val);
-        }
-        for target in targets {
-            util_test_search_in_block(block_searcher, block, target);
-        }
-    }
+    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
+    use proptest::prelude::*;
+    use std::collections::HashSet;

    fn search_in_block_trivial_but_slow(block: &[u32], target: u32) -> usize {
        block.iter().take_while(|&&val| val < target).count()
    }

-    fn test_search_in_block_util(block_searcher: BlockSearcher) {
-        for len in 1u32..128u32 {
-            let v: Vec<u32> = (0..len).map(|i| i * 2).collect();
-            util_test_search_in_block_all(block_searcher, &v[..]);
+    fn util_test_search_in_block(block: &[u32], target: u32) {
+        let cursor = search_in_block_trivial_but_slow(block, target);
+        assert!(cursor < COMPRESSION_BLOCK_SIZE);
+        assert!(block[cursor] >= target);
+        if cursor > 0 {
+            assert!(block[cursor - 1] < target);
+        }
+        assert_eq!(block.len(), COMPRESSION_BLOCK_SIZE);
+        let mut output_buffer = [TERMINATED; COMPRESSION_BLOCK_SIZE];
+        output_buffer[..block.len()].copy_from_slice(block);
+        assert_eq!(branchless_binary_search(&output_buffer, target), cursor);
+    }
+
+    fn util_test_search_in_block_all(block: &[u32]) {
+        let mut targets = HashSet::new();
+        targets.insert(0);
+        for &val in block {
+            if val > 0 {
+                targets.insert(val - 1);
+            }
+            targets.insert(val);
+        }
+        for target in targets {
+            util_test_search_in_block(block, target);
        }
    }

    #[test]
-    fn test_search_in_block_scalar() {
-        test_search_in_block_util(BlockSearcher::Scalar);
+    fn test_search_in_branchless_binary_search() {
+        let v: Vec<u32> = (0..COMPRESSION_BLOCK_SIZE).map(|i| i as u32 * 2).collect();
+        util_test_search_in_block_all(&v[..]);
    }

-    #[cfg(target_arch = "x86_64")]
-    #[test]
-    fn test_search_in_block_sse2() {
-        test_search_in_block_util(BlockSearcher::Sse2);
+    fn monotonous_block() -> impl Strategy<Value = Vec<u32>> {
+        prop::collection::vec(0u32..5u32, COMPRESSION_BLOCK_SIZE).prop_map(|mut deltas| {
+            let mut el = 0;
+            for i in 0..COMPRESSION_BLOCK_SIZE {
+                el += deltas[i];
+                deltas[i] = el;
+            }
+            deltas
+        })
+    }
+
+    proptest! {
+        #[test]
+        fn test_proptest_branchless_binary_search(block in monotonous_block()) {
+            util_test_search_in_block_all(&block[..]);
+        }
    }
 }
--- a/src/postings/block_segment_postings.rs
+++ b/src/postings/block_segment_postings.rs
@@ -1,16 +1,14 @@
 use std::io;

-use crate::common::{BinarySerializable, VInt};
 use crate::directory::FileSlice;
 use crate::directory::OwnedBytes;
 use crate::fieldnorm::FieldNormReader;
-use crate::postings::compression::{
-    AlignedBuffer, BlockDecoder, VIntDecoder, COMPRESSION_BLOCK_SIZE,
-};
+use crate::postings::compression::{BlockDecoder, VIntDecoder, COMPRESSION_BLOCK_SIZE};
 use crate::postings::{BlockInfo, FreqReadingOption, SkipReader};
 use crate::query::Bm25Weight;
 use crate::schema::IndexRecordOption;
 use crate::{DocId, Score, TERMINATED};
+use common::{BinarySerializable, VInt};

 fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
    it.next().map(|first| it.fold(first, Score::max))
@@ -209,9 +207,9 @@ impl BlockSegmentPostings {
    ///
    /// This method is useful to run SSE2 linear search.
    #[inline]
-    pub(crate) fn docs_aligned(&self) -> &AlignedBuffer {
+    pub(crate) fn full_block(&self) -> &[DocId; COMPRESSION_BLOCK_SIZE] {
        debug_assert!(self.block_is_loaded());
-        self.doc_decoder.output_aligned()
+        self.doc_decoder.full_output()
    }

    /// Return the document at index `idx` of the block.
@@ -349,7 +347,6 @@ impl BlockSegmentPostings {
 #[cfg(test)]
 mod tests {
    use super::BlockSegmentPostings;
-    use crate::common::HasLen;
    use crate::core::Index;
    use crate::docset::{DocSet, TERMINATED};
    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
@@ -360,6 +357,7 @@ mod tests {
    use crate::schema::Term;
    use crate::schema::INDEXED;
    use crate::DocId;
+    use common::HasLen;

    #[test]
    fn test_empty_segment_postings() {
--- a/src/postings/compression/mod.rs
+++ b/src/postings/compression/mod.rs
@@ -1,5 +1,5 @@
-use crate::common::FixedSize;
 use bitpacking::{BitPacker, BitPacker4x};
+use common::FixedSize;

 pub const COMPRESSION_BLOCK_SIZE: usize = BitPacker4x::BLOCK_LEN;
 const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * u32::SIZE_IN_BYTES;
@@ -49,16 +49,10 @@ impl BlockEncoder {
    }
 }

-/// We ensure that the OutputBuffer is align on 128 bits
-/// in order to run SSE2 linear search on it.
-#[repr(align(128))]
-#[derive(Clone)]
-pub(crate) struct AlignedBuffer(pub [u32; COMPRESSION_BLOCK_SIZE]);
-
 #[derive(Clone)]
 pub struct BlockDecoder {
    bitpacker: BitPacker4x,
-    output: AlignedBuffer,
+    output: [u32; COMPRESSION_BLOCK_SIZE],
    pub output_len: usize,
 }

@@ -72,7 +66,7 @@ impl BlockDecoder {
    pub fn with_val(val: u32) -> BlockDecoder {
        BlockDecoder {
            bitpacker: BitPacker4x::new(),
-            output: AlignedBuffer([val; COMPRESSION_BLOCK_SIZE]),
+            output: [val; COMPRESSION_BLOCK_SIZE],
            output_len: 0,
        }
    }
@@ -85,28 +79,28 @@ impl BlockDecoder {
    ) -> usize {
        self.output_len = COMPRESSION_BLOCK_SIZE;
        self.bitpacker
-            .decompress_sorted(offset, compressed_data, &mut self.output.0, num_bits)
+            .decompress_sorted(offset, compressed_data, &mut self.output, num_bits)
    }

    pub fn uncompress_block_unsorted(&mut self, compressed_data: &[u8], num_bits: u8) -> usize {
        self.output_len = COMPRESSION_BLOCK_SIZE;
        self.bitpacker
-            .decompress(compressed_data, &mut self.output.0, num_bits)
+            .decompress(compressed_data, &mut self.output, num_bits)
    }

    #[inline]
    pub fn output_array(&self) -> &[u32] {
-        &self.output.0[..self.output_len]
+        &self.output[..self.output_len]
    }

    #[inline]
-    pub(crate) fn output_aligned(&self) -> &AlignedBuffer {
+    pub(crate) fn full_output(&self) -> &[u32; COMPRESSION_BLOCK_SIZE] {
        &self.output
    }

    #[inline]
    pub fn output(&self, idx: usize) -> u32 {
-        self.output.0[idx]
+        self.output[idx]
    }
 }

@@ -190,8 +184,8 @@ impl VIntDecoder for BlockDecoder {
        padding: u32,
    ) -> usize {
        self.output_len = num_els;
-        self.output.0.iter_mut().for_each(|el| *el = padding);
-        vint::uncompress_sorted(compressed_data, &mut self.output.0[..num_els], offset)
+        self.output.iter_mut().for_each(|el| *el = padding);
+        vint::uncompress_sorted(compressed_data, &mut self.output[..num_els], offset)
    }

    fn uncompress_vint_unsorted(
@@ -201,12 +195,12 @@ impl VIntDecoder for BlockDecoder {
        padding: u32,
    ) -> usize {
        self.output_len = num_els;
-        self.output.0.iter_mut().for_each(|el| *el = padding);
-        vint::uncompress_unsorted(compressed_data, &mut self.output.0[..num_els])
+        self.output.iter_mut().for_each(|el| *el = padding);
+        vint::uncompress_unsorted(compressed_data, &mut self.output[..num_els])
    }

    fn uncompress_vint_unsorted_until_end(&mut self, compressed_data: &[u8]) {
-        let num_els = vint::uncompress_unsorted_until_end(compressed_data, &mut self.output.0);
+        let num_els = vint::uncompress_unsorted_until_end(compressed_data, &mut self.output);
        self.output_len = num_els;
    }
 }
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -3,6 +3,9 @@ Postings module (also called inverted index)
 */

 mod block_search;
+
+pub(crate) use self::block_search::branchless_binary_search;
+
 mod block_segment_postings;
 pub(crate) mod compression;
 mod postings;
@@ -14,7 +17,6 @@ mod skip;
 mod stacker;
 mod term_info;

-pub(crate) use self::block_search::BlockSearcher;
 pub use self::block_segment_postings::BlockSegmentPostings;
 pub use self::postings::Postings;
 pub(crate) use self::postings_writer::MultiFieldPostingsWriter;
--- a/src/postings/postings.rs
+++ b/src/postings/postings.rs
@@ -11,7 +11,7 @@ use crate::docset::DocSet;
 /// but other implementations mocking `SegmentPostings` exist,
 /// for merging segments or for testing.
 pub trait Postings: DocSet + 'static {
-    /// Returns the term frequency
+    /// The number of times the term appears in the document.
    fn term_freq(&self) -> u32;

    /// Returns the positions offseted with a given value.
--- a/src/postings/postings_writer.rs
+++ b/src/postings/postings_writer.rs
@@ -133,7 +133,8 @@ impl MultiFieldPostingsWriter {
        doc_id_map: Option<&DocIdMapping>,
    ) -> crate::Result<HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>>> {
        let mut term_offsets: Vec<(&[u8], Addr, UnorderedTermId)> =
-            self.term_index.iter().collect();
+            Vec::with_capacity(self.term_index.len());
+        term_offsets.extend(self.term_index.iter());
        term_offsets.sort_unstable_by_key(|&(k, _, _)| k);

        let mut unordered_term_mappings: HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>> =
--- a/src/postings/recorder.rs
+++ b/src/postings/recorder.rs
@@ -1,10 +1,8 @@
 use super::stacker::{ExpUnrolledLinkedList, MemoryArena};
+use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::postings::FieldSerializer;
 use crate::DocId;
-use crate::{
-    common::{read_u32_vint, write_u32_vint},
-    indexer::doc_id_mapping::DocIdMapping,
-};
+use common::{read_u32_vint, write_u32_vint};

 const POSITION_END: u32 = 0;

--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -1,12 +1,12 @@
-use crate::common::HasLen;
 use crate::docset::DocSet;
-use crate::fastfield::DeleteBitSet;
+use crate::fastfield::AliveBitSet;
 use crate::positions::PositionReader;
+use crate::postings::branchless_binary_search;
 use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
-use crate::postings::BlockSearcher;
 use crate::postings::BlockSegmentPostings;
 use crate::postings::Postings;
 use crate::{DocId, TERMINATED};
+use common::HasLen;

 /// `SegmentPostings` represents the inverted list or postings associated to
 /// a term in a `Segment`.
@@ -18,7 +18,6 @@ pub struct SegmentPostings {
    pub(crate) block_cursor: BlockSegmentPostings,
    cur: usize,
    position_reader: Option<PositionReader>,
-    block_searcher: BlockSearcher,
 }

 impl SegmentPostings {
@@ -28,7 +27,6 @@ impl SegmentPostings {
            block_cursor: BlockSegmentPostings::empty(),
            cur: 0,
            position_reader: None,
-            block_searcher: BlockSearcher::default(),
        }
    }

@@ -36,7 +34,7 @@ impl SegmentPostings {
    ///
    /// This method will clone and scan through the posting lists.
    /// (this is a rather expensive operation).
-    pub fn doc_freq_given_deletes(&self, delete_bitset: &DeleteBitSet) -> u32 {
+    pub fn doc_freq_given_deletes(&self, alive_bitset: &AliveBitSet) -> u32 {
        let mut docset = self.clone();
        let mut doc_freq = 0;
        loop {
@@ -44,7 +42,7 @@ impl SegmentPostings {
            if doc == TERMINATED {
                return doc_freq;
            }
-            if delete_bitset.is_alive(doc) {
+            if alive_bitset.is_alive(doc) {
                doc_freq += 1u32;
            }
            docset.advance();
@@ -154,7 +152,6 @@ impl SegmentPostings {
            block_cursor: segment_block_postings,
            cur: 0, // cursor within the block
            position_reader,
-            block_searcher: BlockSearcher::default(),
        }
    }
 }
@@ -183,8 +180,8 @@ impl DocSet for SegmentPostings {
        self.block_cursor.seek(target);

        // At this point we are on the block, that might contain our document.
-        let output = self.block_cursor.docs_aligned();
-        self.cur = self.block_searcher.search_in_block(output, target);
+        let output = self.block_cursor.full_block();
+        self.cur = branchless_binary_search(output, target);

        // The last block is not full and padded with the value TERMINATED,
        // so that we are guaranteed to have at least doc in the block (a real one or the padding)
@@ -197,7 +194,7 @@ impl DocSet for SegmentPostings {
        // with the value `TERMINATED`.
        //
        // After the search, the cursor should point to the first value of TERMINATED.
-        let doc = output.0[self.cur];
+        let doc = output[self.cur];
        debug_assert!(doc >= target);
        debug_assert_eq!(doc, self.doc());
        doc
@@ -268,10 +265,10 @@ impl Postings for SegmentPostings {
 mod tests {

    use super::SegmentPostings;
-    use crate::common::HasLen;
+    use common::HasLen;

    use crate::docset::{DocSet, TERMINATED};
-    use crate::fastfield::DeleteBitSet;
+    use crate::fastfield::AliveBitSet;
    use crate::postings::postings::Postings;

    #[test]
@@ -299,9 +296,10 @@ mod tests {
    fn test_doc_freq() {
        let docs = SegmentPostings::create_from_docs(&[0, 2, 10]);
        assert_eq!(docs.doc_freq(), 3);
-        let delete_bitset = DeleteBitSet::for_test(&[2], 12);
-        assert_eq!(docs.doc_freq_given_deletes(&delete_bitset), 2);
-        let all_deleted = DeleteBitSet::for_test(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[2], 12);
+        assert_eq!(docs.doc_freq_given_deletes(&alive_bitset), 2);
+        let all_deleted =
+            AliveBitSet::for_test_from_deleted_docs(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
        assert_eq!(docs.doc_freq_given_deletes(&all_deleted), 0);
    }
 }
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -1,7 +1,6 @@
 use super::TermInfo;
-use crate::common::{BinarySerializable, VInt};
-use crate::common::{CompositeWrite, CountingWriter};
 use crate::core::Segment;
+use crate::directory::CompositeWrite;
 use crate::directory::WritePtr;
 use crate::fieldnorm::FieldNormReader;
 use crate::positions::PositionSerializer;
@@ -12,6 +11,8 @@ use crate::schema::{Field, FieldEntry, FieldType};
 use crate::schema::{IndexRecordOption, Schema};
 use crate::termdict::{TermDictionaryBuilder, TermOrdinal};
 use crate::{DocId, Score};
+use common::CountingWriter;
+use common::{BinarySerializable, VInt};
 use std::cmp::Ordering;
 use std::io::{self, Write};

@@ -442,10 +443,8 @@ impl<W: Write> PostingsSerializer<W> {
            let skip_data = self.skip_write.data();
            VInt(skip_data.len() as u64).serialize(&mut self.output_write)?;
            self.output_write.write_all(skip_data)?;
-            self.output_write.write_all(&self.postings_write[..])?;
-        } else {
-            self.output_write.write_all(&self.postings_write[..])?;
        }
+        self.output_write.write_all(&self.postings_write[..])?;
        self.skip_write.clear();
        self.postings_write.clear();
        self.bm25_weight = None;
--- a/src/postings/stacker/term_hashmap.rs
+++ b/src/postings/stacker/term_hashmap.rs
@@ -148,6 +148,10 @@ impl TermHashMap {
        unordered_term_id
    }

+    pub fn len(&self) -> usize {
+        self.len
+    }
+
    pub fn iter(&self) -> Iter<'_> {
        Iter {
            inner: self.occupied.iter(),
--- a/src/postings/term_info.rs
+++ b/src/postings/term_info.rs
@@ -1,4 +1,4 @@
-use crate::common::{BinarySerializable, FixedSize};
+use common::{BinarySerializable, FixedSize};
 use std::io;
 use std::iter::ExactSizeIterator;
 use std::ops::Range;
@@ -67,7 +67,7 @@ impl BinarySerializable for TermInfo {
 mod tests {

    use super::TermInfo;
-    use crate::common::test::fixed_size_test;
+    use crate::tests::fixed_size_test;

    // TODO add serialize/deserialize test for terminfo

--- a/src/query/automaton_weight.rs
+++ b/src/query/automaton_weight.rs
@@ -1,4 +1,3 @@
-use crate::common::BitSet;
 use crate::core::SegmentReader;
 use crate::query::ConstScorer;
 use crate::query::{BitSetDocSet, Explanation};
@@ -7,6 +6,7 @@ use crate::schema::{Field, IndexRecordOption};
 use crate::termdict::{TermDictionary, TermStreamer};
 use crate::TantivyError;
 use crate::{DocId, Score};
+use common::BitSet;
 use std::io;
 use std::sync::Arc;
 use tantivy_fst::Automaton;
@@ -121,10 +121,7 @@ mod tests {
        }

        fn is_match(&self, state: &Self::State) -> bool {
-            match *state {
-                State::AfterA => true,
-                _ => false,
-            }
+            matches!(*state, State::AfterA)
        }

        fn accept(&self, state: &Self::State, byte: u8) -> Self::State {
--- a/src/query/bitset/mod.rs
+++ b/src/query/bitset/mod.rs
@@ -1,6 +1,6 @@
-use crate::common::{BitSet, TinySet};
 use crate::docset::{DocSet, TERMINATED};
 use crate::DocId;
+use common::{BitSet, TinySet};

 /// A `BitSetDocSet` makes it possible to iterate through a bitset as if it was a `DocSet`.
 ///
@@ -96,10 +96,13 @@ impl DocSet for BitSetDocSet {

 #[cfg(test)]
 mod tests {
+    use std::collections::BTreeSet;
+
    use super::BitSetDocSet;
-    use crate::common::BitSet;
    use crate::docset::{DocSet, TERMINATED};
+    use crate::tests::generate_nonunique_unsorted;
    use crate::DocId;
+    use common::BitSet;

    fn create_docbitset(docs: &[DocId], max_doc: DocId) -> BitSetDocSet {
        let mut docset = BitSet::with_max_value(max_doc);
@@ -109,6 +112,29 @@ mod tests {
        BitSetDocSet::from(docset)
    }

+    #[test]
+    fn test_bitset_large() {
+        let arr = generate_nonunique_unsorted(100_000, 5_000);
+        let mut btreeset: BTreeSet<u32> = BTreeSet::new();
+        let mut bitset = BitSet::with_max_value(100_000);
+        for el in arr {
+            btreeset.insert(el);
+            bitset.insert(el);
+        }
+        for i in 0..100_000 {
+            assert_eq!(btreeset.contains(&i), bitset.contains(i));
+        }
+        assert_eq!(btreeset.len(), bitset.len());
+        let mut bitset_docset = BitSetDocSet::from(bitset);
+        let mut remaining = true;
+        for el in btreeset.into_iter() {
+            assert!(remaining);
+            assert_eq!(bitset_docset.doc(), el);
+            remaining = bitset_docset.advance() != TERMINATED;
+        }
+        assert!(!remaining);
+    }
+
    #[test]
    fn test_empty() {
        let bitset = BitSet::with_max_value(1000);
--- a/src/query/boolean_query/mod.rs
+++ b/src/query/boolean_query/mod.rs
@@ -310,7 +310,7 @@ mod tests {
        ));
        let query = BooleanQuery::from(vec![(Occur::Should, term_a), (Occur::Should, term_b)]);
        let explanation = query.explain(&searcher, DocAddress::new(0, 0u32))?;
-        assert_nearly_equals!(explanation.value(), 0.6931472);
+        assert_nearly_equals!(explanation.value(), std::f32::consts::LN_2);
        Ok(())
    }
 }
--- a/src/query/boost_query.rs
+++ b/src/query/boost_query.rs
@@ -1,4 +1,4 @@
-use crate::fastfield::DeleteBitSet;
+use crate::fastfield::AliveBitSet;
 use crate::query::explanation::does_not_match;
 use crate::query::{Explanation, Query, Scorer, Weight};
 use crate::{DocId, DocSet, Score, Searcher, SegmentReader, Term};
@@ -118,8 +118,8 @@ impl<S: Scorer> DocSet for BoostScorer<S> {
        self.underlying.size_hint()
    }

-    fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
-        self.underlying.count(delete_bitset)
+    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
+        self.underlying.count(alive_bitset)
    }

    fn count_including_deleted(&mut self) -> u32 {
--- a/src/query/range_query.rs
+++ b/src/query/range_query.rs
@@ -1,4 +1,3 @@
-use crate::common::BitSet;
 use crate::core::Searcher;
 use crate::core::SegmentReader;
 use crate::error::TantivyError;
@@ -10,6 +9,7 @@ use crate::schema::Type;
 use crate::schema::{Field, IndexRecordOption, Term};
 use crate::termdict::{TermDictionary, TermStreamer};
 use crate::{DocId, Score};
+use common::BitSet;
 use std::io;
 use std::ops::{Bound, Range};

--- a/src/query/regex_query.rs
+++ b/src/query/regex_query.rs
@@ -10,6 +10,9 @@ use tantivy_fst::Regex;
 /// containing a specific term that matches
 /// a regex pattern.
 ///
+/// Wildcard queries (e.g. ho*se) can be achieved
+/// by converting them to their regex counterparts.
+///
 /// ```rust
 /// use tantivy::collector::Count;
 /// use tantivy::query::RegexQuery;
--- a/src/query/term_query/term_weight.rs
+++ b/src/query/term_query/term_weight.rs
@@ -40,8 +40,8 @@ impl Weight for TermWeight {
    }

    fn count(&self, reader: &SegmentReader) -> crate::Result<u32> {
-        if let Some(delete_bitset) = reader.delete_bitset() {
-            Ok(self.scorer(reader, 1.0)?.count(delete_bitset))
+        if let Some(alive_bitset) = reader.alive_bitset() {
+            Ok(self.scorer(reader, 1.0)?.count(alive_bitset))
        } else {
            let field = self.term.field();
            let inv_index = reader.inverted_index(field)?;
--- a/src/query/union.rs
+++ b/src/query/union.rs
@@ -1,9 +1,9 @@
-use crate::common::TinySet;
 use crate::docset::{DocSet, TERMINATED};
 use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner};
 use crate::query::Scorer;
 use crate::DocId;
 use crate::Score;
+use common::TinySet;

 const HORIZON_NUM_TINYBITSETS: usize = 64;
 const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32;
--- a/src/query/vec_docset.rs
+++ b/src/query/vec_docset.rs
@@ -1,8 +1,8 @@
 #![allow(dead_code)]

-use crate::common::HasLen;
 use crate::docset::{DocSet, TERMINATED};
 use crate::DocId;
+use common::HasLen;

 /// Simulate a `Postings` objects from a `VecPostings`.
 /// `VecPostings` only exist for testing purposes.
--- a/src/query/weight.rs
+++ b/src/query/weight.rs
@@ -59,8 +59,8 @@ pub trait Weight: Send + Sync + 'static {
    /// Returns the number documents within the given `SegmentReader`.
    fn count(&self, reader: &SegmentReader) -> crate::Result<u32> {
        let mut scorer = self.scorer(reader, 1.0)?;
-        if let Some(delete_bitset) = reader.delete_bitset() {
-            Ok(scorer.count(delete_bitset))
+        if let Some(alive_bitset) = reader.alive_bitset() {
+            Ok(scorer.count(alive_bitset))
        } else {
            Ok(scorer.count_including_deleted())
        }
--- a/src/schema/document.rs
+++ b/src/schema/document.rs
@@ -1,8 +1,8 @@
 use super::*;
-use crate::common::BinarySerializable;
-use crate::common::VInt;
 use crate::tokenizer::PreTokenizedString;
 use crate::DateTime;
+use common::BinarySerializable;
+use common::VInt;
 use std::io::{self, Read, Write};
 use std::mem;

--- a/src/schema/facet.rs
+++ b/src/schema/facet.rs
@@ -1,4 +1,4 @@
-use crate::common::BinarySerializable;
+use common::BinarySerializable;
 use once_cell::sync::Lazy;
 use regex::Regex;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
--- a/src/schema/field.rs
+++ b/src/schema/field.rs
@@ -1,4 +1,4 @@
-use crate::common::BinarySerializable;
+use common::BinarySerializable;
 use std::io;
 use std::io::Read;
 use std::io::Write;
--- a/src/schema/field_entry.rs
+++ b/src/schema/field_entry.rs
@@ -109,17 +109,11 @@ impl FieldEntry {
        &self.field_type
    }

-    /// Returns true iff the field is indexed
+    /// Returns true iff the field is indexed.
+    ///
+    /// An indexed field is searchable.
    pub fn is_indexed(&self) -> bool {
-        match self.field_type {
-            FieldType::Str(ref options) => options.get_indexing_options().is_some(),
-            FieldType::U64(ref options)
-            | FieldType::I64(ref options)
-            | FieldType::F64(ref options)
-            | FieldType::Date(ref options) => options.is_indexed(),
-            FieldType::HierarchicalFacet(ref options) => options.is_indexed(),
-            FieldType::Bytes(ref options) => options.is_indexed(),
-        }
+        self.field_type.is_indexed()
    }

    /// Returns true iff the field is a int (signed or unsigned) fast field
--- a/src/schema/field_value.rs
+++ b/src/schema/field_value.rs
@@ -1,6 +1,6 @@
-use crate::common::BinarySerializable;
 use crate::schema::Field;
 use crate::schema::Value;
+use common::BinarySerializable;
 use std::io::{self, Read, Write};

 /// `FieldValue` holds together a `Field` and its `Value`.
--- a/src/schema/flags.rs
+++ b/src/schema/flags.rs
@@ -20,7 +20,7 @@ pub const STORED: SchemaFlagList<StoredFlag, ()> = SchemaFlagList {

 #[derive(Clone)]
 pub struct IndexedFlag;
-/// Flag to mark the field as indexed.
+/// Flag to mark the field as indexed. An indexed field is searchable.
 ///
 /// The `INDEXED` flag can only be used when building `IntOptions` (`u64`, `i64` and `f64` fields)
 /// Of course, text fields can also be indexed... But this is expressed by using either the
--- a/src/schema/int_options.rs
+++ b/src/schema/int_options.rs
@@ -29,7 +29,7 @@ impl IntOptions {
        self.stored
    }

-    /// Returns true iff the value is indexed.
+    /// Returns true iff the value is indexed and therefore searchable.
    pub fn is_indexed(&self) -> bool {
        self.indexed
    }
@@ -52,6 +52,8 @@ impl IntOptions {
    ///
    /// Setting an integer as indexed will generate
    /// a posting list for each value taken by the integer.
+    ///
+    /// This is required for the field to be searchable.
    pub fn set_indexed(mut self) -> IntOptions {
        self.indexed = true;
        self
--- a/src/schema/mod.rs
+++ b/src/schema/mod.rs
@@ -157,7 +157,7 @@ pub use self::int_options::IntOptions;
 /// A field name can be any character, must have at least one character
 /// and must not start with a `-`.
 pub fn is_valid_field_name(field_name: &str) -> bool {
-    field_name.len() > 0 && !field_name.starts_with('-')
+    !field_name.is_empty() && !field_name.starts_with('-')
 }

 #[cfg(test)]
--- a/src/schema/term.rs
+++ b/src/schema/term.rs
@@ -1,9 +1,9 @@
 use std::fmt;

 use super::Field;
-use crate::common;
 use crate::schema::Facet;
 use crate::DateTime;
+use common;
 use std::str;

 /// Size (in bytes) of the buffer of a int field.
--- a/src/schema/text_options.rs
+++ b/src/schema/text_options.rs
@@ -94,7 +94,7 @@ impl TextFieldIndexing {
    }
 }

-/// The field will be untokenized and indexed
+/// The field will be untokenized and indexed.
 pub const STRING: TextOptions = TextOptions {
    indexing: Some(TextFieldIndexing {
        tokenizer: Cow::Borrowed("raw"),
@@ -103,7 +103,7 @@ pub const STRING: TextOptions = TextOptions {
    stored: false,
 };

-/// The field will be tokenized and indexed
+/// The field will be tokenized and indexed.
 pub const TEXT: TextOptions = TextOptions {
    indexing: Some(TextFieldIndexing {
        tokenizer: Cow::Borrowed("default"),
--- a/src/schema/value.rs
+++ b/src/schema/value.rs
@@ -276,10 +276,10 @@ impl From<PreTokenizedString> for Value {

 mod binary_serialize {
    use super::Value;
-    use crate::common::{f64_to_u64, u64_to_f64, BinarySerializable};
    use crate::schema::Facet;
    use crate::tokenizer::PreTokenizedString;
    use chrono::{TimeZone, Utc};
+    use common::{f64_to_u64, u64_to_f64, BinarySerializable};
    use std::io::{self, Read, Write};

    const TEXT_CODE: u8 = 0;
--- a/src/store/compression_lz4_block.rs
+++ b/src/store/compression_lz4_block.rs
@@ -12,7 +12,7 @@ pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()>
    unsafe {
        compressed.set_len(maximum_ouput_size + 4);
    }
-    let bytes_written = compress_into(uncompressed, compressed, 4)
+    let bytes_written = compress_into(uncompressed, &mut compressed[4..])
        .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?;
    let num_bytes = uncompressed.len() as u32;
    compressed[0..4].copy_from_slice(&num_bytes.to_le_bytes());
@@ -35,7 +35,7 @@ pub fn decompress(compressed: &[u8], decompressed: &mut Vec<u8>) -> io::Result<(
    unsafe {
        decompressed.set_len(uncompressed_size);
    }
-    let bytes_written = decompress_into(&compressed[4..], decompressed, 0)
+    let bytes_written = decompress_into(&compressed[4..], decompressed)
        .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err.to_string()))?;
    if bytes_written != uncompressed_size {
        return Err(io::Error::new(
--- a/src/store/footer.rs
+++ b/src/store/footer.rs
@@ -1,8 +1,5 @@
-use crate::{
-    common::{BinarySerializable, FixedSize, HasLen},
-    directory::FileSlice,
-    store::Compressor,
-};
+use crate::{directory::FileSlice, store::Compressor};
+use common::{BinarySerializable, FixedSize, HasLen};
 use std::io;

 #[derive(Debug, Clone, PartialEq)]
--- a/src/store/index/block.rs
+++ b/src/store/index/block.rs
@@ -1,6 +1,6 @@
-use crate::common::VInt;
 use crate::store::index::{Checkpoint, CHECKPOINT_PERIOD};
 use crate::DocId;
+use common::VInt;
 use std::io;
 use std::ops::Range;

--- a/src/store/index/skip_index.rs
+++ b/src/store/index/skip_index.rs
@@ -1,8 +1,8 @@
-use crate::common::{BinarySerializable, VInt};
 use crate::directory::OwnedBytes;
 use crate::store::index::block::CheckpointBlock;
 use crate::store::index::Checkpoint;
 use crate::DocId;
+use common::{BinarySerializable, VInt};

 pub struct LayerCursor<'a> {
    remaining: &'a [u8],
--- a/src/store/index/skip_index_builder.rs
+++ b/src/store/index/skip_index_builder.rs
@@ -1,6 +1,6 @@
-use crate::common::{BinarySerializable, VInt};
 use crate::store::index::block::CheckpointBlock;
 use crate::store::index::{Checkpoint, CHECKPOINT_PERIOD};
+use common::{BinarySerializable, VInt};
 use std::io;
 use std::io::Write;

--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -57,7 +57,7 @@ pub mod tests {
    use futures::executor::block_on;

    use super::*;
-    use crate::fastfield::DeleteBitSet;
+    use crate::fastfield::AliveBitSet;
    use crate::schema::{self, FieldValue, TextFieldIndexing, STORED, TEXT};
    use crate::schema::{Document, TextOptions};
    use crate::{
@@ -113,7 +113,8 @@ pub mod tests {
    fn test_doc_store_iter_with_delete_bug_1077() -> crate::Result<()> {
        // this will cover deletion of the first element in a checkpoint
        let deleted_docids = (200..300).collect::<Vec<_>>();
-        let delete_bitset = DeleteBitSet::for_test(&deleted_docids, NUM_DOCS as u32);
+        let alive_bitset =
+            AliveBitSet::for_test_from_deleted_docs(&deleted_docids, NUM_DOCS as u32);

        let path = Path::new("store");
        let directory = RamDirectory::create();
@@ -134,7 +135,7 @@ pub mod tests {
            );
        }

-        for (_, doc) in store.iter(Some(&delete_bitset)).enumerate() {
+        for (_, doc) in store.iter(Some(&alive_bitset)).enumerate() {
            let doc = doc?;
            let title_content = doc.get_first(field_title).unwrap().text().unwrap();
            if !title_content.starts_with("Doc ") {
@@ -146,7 +147,7 @@ pub mod tests {
                .unwrap()
                .parse::<u32>()
                .unwrap();
-            if delete_bitset.is_deleted(id) {
+            if alive_bitset.is_deleted(id) {
                panic!("unexpected deleted document {}", id);
            }
        }
@@ -230,7 +231,7 @@ pub mod tests {
        let searcher = index.reader().unwrap().searcher();
        let reader = searcher.segment_reader(0);
        let store = reader.get_store_reader().unwrap();
-        for doc in store.iter(reader.delete_bitset()) {
+        for doc in store.iter(reader.alive_bitset()) {
            assert_eq!(
                *doc?.get_first(text_field).unwrap().text().unwrap(),
                "deletemenot".to_string()
@@ -288,7 +289,7 @@ pub mod tests {
        let reader = searcher.segment_readers().iter().last().unwrap();
        let store = reader.get_store_reader().unwrap();

-        for doc in store.iter(reader.delete_bitset()).take(50) {
+        for doc in store.iter(reader.alive_bitset()).take(50) {
            assert_eq!(
                *doc?.get_first(text_field).unwrap().text().unwrap(),
                LOREM.to_string()
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -5,11 +5,8 @@ use crate::schema::Document;
 use crate::space_usage::StoreSpaceUsage;
 use crate::store::index::Checkpoint;
 use crate::DocId;
-use crate::{
-    common::{BinarySerializable, HasLen, VInt},
-    error::DataCorruption,
-    fastfield::DeleteBitSet,
-};
+use crate::{error::DataCorruption, fastfield::AliveBitSet};
+use common::{BinarySerializable, HasLen, VInt};
 use lru::LruCache;
 use std::io;
 use std::sync::atomic::{AtomicUsize, Ordering};
@@ -136,12 +133,12 @@ impl StoreReader {

    /// Iterator over all Documents in their order as they are stored in the doc store.
    /// Use this, if you want to extract all Documents from the doc store.
-    /// The delete_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
+    /// The alive_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
    pub fn iter<'a: 'b, 'b>(
        &'b self,
-        delete_bitset: Option<&'a DeleteBitSet>,
+        alive_bitset: Option<&'a AliveBitSet>,
    ) -> impl Iterator<Item = crate::Result<Document>> + 'b {
-        self.iter_raw(delete_bitset).map(|doc_bytes_res| {
+        self.iter_raw(alive_bitset).map(|doc_bytes_res| {
            let mut doc_bytes = doc_bytes_res?;
            Ok(Document::deserialize(&mut doc_bytes)?)
        })
@@ -149,10 +146,10 @@ impl StoreReader {

    /// Iterator over all RawDocuments in their order as they are stored in the doc store.
    /// Use this, if you want to extract all Documents from the doc store.
-    /// The delete_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
+    /// The alive_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
    pub(crate) fn iter_raw<'a: 'b, 'b>(
        &'b self,
-        delete_bitset: Option<&'a DeleteBitSet>,
+        alive_bitset: Option<&'a AliveBitSet>,
    ) -> impl Iterator<Item = crate::Result<OwnedBytes>> + 'b {
        let last_docid = self
            .block_checkpoints()
@@ -182,7 +179,7 @@ impl StoreReader {
                    num_skipped = 0;
                }

-                let alive = delete_bitset.map_or(true, |bitset| bitset.is_alive(doc_id));
+                let alive = alive_bitset.map_or(true, |bitset| bitset.is_alive(doc_id));
                if alive {
                    let ret = Some((curr_block.clone(), num_skipped, reset_block_pos));
                    // the map block will move over the num_skipped, so we reset to 0
--- a/src/store/writer.rs
+++ b/src/store/writer.rs
@@ -1,13 +1,13 @@
 use super::index::SkipIndexBuilder;
 use super::StoreReader;
 use super::{compressors::Compressor, footer::DocStoreFooter};
-use crate::common::CountingWriter;
-use crate::common::{BinarySerializable, VInt};
 use crate::directory::TerminatingWrite;
 use crate::directory::WritePtr;
 use crate::schema::Document;
 use crate::store::index::Checkpoint;
 use crate::DocId;
+use common::CountingWriter;
+use common::{BinarySerializable, VInt};
 use std::io::{self, Write};

 const BLOCK_SIZE: usize = 16_384;
--- a/src/termdict/fst_termdict/term_info_store.rs
+++ b/src/termdict/fst_termdict/term_info_store.rs
@@ -1,8 +1,8 @@
-use crate::common::{BinarySerializable, FixedSize};
 use crate::directory::{FileSlice, OwnedBytes};
 use crate::postings::TermInfo;
 use crate::termdict::TermOrdinal;
 use byteorder::{ByteOrder, LittleEndian};
+use common::{BinarySerializable, FixedSize};
 use std::cmp;
 use std::io::{self, Read, Write};
 use tantivy_bitpacker::compute_num_bits;
@@ -290,16 +290,16 @@ mod tests {
    use super::extract_bits;
    use super::TermInfoBlockMeta;
    use super::{TermInfoStore, TermInfoStoreWriter};
-    use crate::common;
-    use crate::common::BinarySerializable;
    use crate::directory::FileSlice;
    use crate::postings::TermInfo;
+    use common;
+    use common::BinarySerializable;
    use tantivy_bitpacker::compute_num_bits;
    use tantivy_bitpacker::BitPacker;

    #[test]
    fn test_term_info_block() {
-        common::test::fixed_size_test::<TermInfoBlockMeta>();
+        crate::tests::fixed_size_test::<TermInfoBlockMeta>();
    }

    #[test]
--- a/src/termdict/fst_termdict/termdict.rs
+++ b/src/termdict/fst_termdict/termdict.rs
@@ -1,10 +1,10 @@
 use super::term_info_store::{TermInfoStore, TermInfoStoreWriter};
 use super::{TermStreamer, TermStreamerBuilder};
-use crate::common::{BinarySerializable, CountingWriter};
 use crate::directory::{FileSlice, OwnedBytes};
 use crate::error::DataCorruption;
 use crate::postings::TermInfo;
 use crate::termdict::TermOrdinal;
+use common::{BinarySerializable, CountingWriter};
 use once_cell::sync::Lazy;
 use std::io::{self, Write};
 use tantivy_fst::raw::Fst;
--- a/src/tokenizer/alphanum_only.rs
+++ b/src/tokenizer/alphanum_only.rs
@@ -61,3 +61,31 @@ impl<'a> TokenStream for AlphaNumOnlyFilterStream<'a> {
        self.tail.token_mut()
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::tokenizer::tests::assert_token;
+    use crate::tokenizer::{AlphaNumOnlyFilter, SimpleTokenizer, TextAnalyzer, Token};
+
+    #[test]
+    fn test_alphanum_only() {
+        let tokens = token_stream_helper("I am a cat. 我輩は猫である。(1906)");
+        assert_eq!(tokens.len(), 5);
+        assert_token(&tokens[0], 0, "I", 0, 1);
+        assert_token(&tokens[1], 1, "am", 2, 4);
+        assert_token(&tokens[2], 2, "a", 5, 6);
+        assert_token(&tokens[3], 3, "cat", 7, 10);
+        assert_token(&tokens[4], 5, "1906", 37, 41);
+    }
+
+    fn token_stream_helper(text: &str) -> Vec<Token> {
+        let a = TextAnalyzer::from(SimpleTokenizer).filter(AlphaNumOnlyFilter);
+        let mut token_stream = a.token_stream(text);
+        let mut tokens: Vec<Token> = vec![];
+        let mut add_token = |token: &Token| {
+            tokens.push(token.clone());
+        };
+        token_stream.process(&mut add_token);
+        tokens
+    }
+}
--- a/src/tokenizer/lower_caser.rs
+++ b/src/tokenizer/lower_caser.rs
@@ -56,31 +56,30 @@ impl<'a> TokenStream for LowerCaserTokenStream<'a> {

 #[cfg(test)]
 mod tests {
-    use crate::tokenizer::{LowerCaser, SimpleTokenizer, TextAnalyzer};
+    use crate::tokenizer::tests::assert_token;
+    use crate::tokenizer::{LowerCaser, SimpleTokenizer, TextAnalyzer, Token};

    #[test]
    fn test_to_lower_case() {
-        assert_eq!(
-            lowercase_helper("Русский текст"),
-            vec!["русский".to_string(), "текст".to_string()]
-        );
+        let tokens = token_stream_helper("Tree");
+        assert_eq!(tokens.len(), 1);
+        assert_token(&tokens[0], 0, "tree", 0, 4);
+
+        let tokens = token_stream_helper("Русский текст");
+        assert_eq!(tokens.len(), 2);
+        assert_token(&tokens[0], 0, "русский", 0, 14);
+        assert_token(&tokens[1], 1, "текст", 15, 25);
    }

-    fn lowercase_helper(text: &str) -> Vec<String> {
-        let mut tokens = vec![];
+    fn token_stream_helper(text: &str) -> Vec<Token> {
        let mut token_stream = TextAnalyzer::from(SimpleTokenizer)
            .filter(LowerCaser)
            .token_stream(text);
-        while token_stream.advance() {
-            let token_text = token_stream.token().text.clone();
-            tokens.push(token_text);
-        }
+        let mut tokens = vec![];
+        let mut add_token = |token: &Token| {
+            tokens.push(token.clone());
+        };
+        token_stream.process(&mut add_token);
        tokens
    }
-
-    #[test]
-    fn test_lowercaser() {
-        assert_eq!(lowercase_helper("Tree"), vec!["tree".to_string()]);
-        assert_eq!(lowercase_helper("Русский"), vec!["русский".to_string()]);
-    }
 }
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -131,6 +131,7 @@ mod token_stream_chain;
 mod tokenized_string;
 mod tokenizer;
 mod tokenizer_manager;
+mod whitespace_tokenizer;

 pub use self::alphanum_only::AlphaNumOnlyFilter;
 pub use self::ascii_folding_filter::AsciiFoldingFilter;
@@ -143,6 +144,7 @@ pub use self::simple_tokenizer::SimpleTokenizer;
 pub use self::stemmer::{Language, Stemmer};
 pub use self::stop_word_filter::StopWordFilter;
 pub(crate) use self::token_stream_chain::TokenStreamChain;
+pub use self::whitespace_tokenizer::WhitespaceTokenizer;

 pub use self::tokenized_string::{PreTokenizedStream, PreTokenizedString};
 pub use self::tokenizer::{
@@ -277,4 +279,25 @@ pub mod tests {
            assert!(tokens.is_empty());
        }
    }
+
+    #[test]
+    fn test_whitespace_tokenizer() {
+        let tokenizer_manager = TokenizerManager::default();
+        let ws_tokenizer = tokenizer_manager.get("whitespace").unwrap();
+        let mut tokens: Vec<Token> = vec![];
+        {
+            let mut add_token = |token: &Token| {
+                tokens.push(token.clone());
+            };
+            ws_tokenizer
+                .token_stream("Hello, happy tax payer!")
+                .process(&mut add_token);
+        }
+
+        assert_eq!(tokens.len(), 4);
+        assert_token(&tokens[0], 0, "Hello,", 0, 6);
+        assert_token(&tokens[1], 1, "happy", 7, 12);
+        assert_token(&tokens[2], 2, "tax", 13, 16);
+        assert_token(&tokens[3], 3, "payer!", 17, 23);
+    }
 }
--- a/src/tokenizer/raw_tokenizer.rs
+++ b/src/tokenizer/raw_tokenizer.rs
@@ -42,3 +42,27 @@ impl TokenStream for RawTokenStream {
        &mut self.token
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::tokenizer::tests::assert_token;
+    use crate::tokenizer::{RawTokenizer, TextAnalyzer, Token};
+
+    #[test]
+    fn test_raw_tokenizer() {
+        let tokens = token_stream_helper("Hello, happy tax payer!");
+        assert_eq!(tokens.len(), 1);
+        assert_token(&tokens[0], 0, "Hello, happy tax payer!", 0, 23);
+    }
+
+    fn token_stream_helper(text: &str) -> Vec<Token> {
+        let a = TextAnalyzer::from(RawTokenizer);
+        let mut token_stream = a.token_stream(text);
+        let mut tokens: Vec<Token> = vec![];
+        let mut add_token = |token: &Token| {
+            tokens.push(token.clone());
+        };
+        token_stream.process(&mut add_token);
+        tokens
+    }
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Paul Masurel	89f91b1b58	first stab	2021-10-06 12:10:16 +09:00
Paul Masurel	19965c46bc	Added wasm-mt	2021-10-06 10:45:17 +09:00
dependabot[bot]	4d05b26e7a	Update lru requirement from 0.6.5 to 0.7.0 (#1165 ) Updates the requirements on [lru](https://github.com/jeromefroe/lru-rs) to permit the latest version. - [Release notes](https://github.com/jeromefroe/lru-rs/releases) - [Changelog](https://github.com/jeromefroe/lru-rs/blob/master/CHANGELOG.md) - [Commits](https://github.com/jeromefroe/lru-rs/compare/0.6.5...0.7.0) --- updated-dependencies: - dependency-name: lru dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2021-10-06 05:50:24 +09:00
Paul Masurel	0855649986	Leaning more on the alive (vs delete) semantics. (#1164 )	2021-10-05 18:53:29 +09:00
PSeitz	d828e58903	Merge pull request #1163 from PSeitz/reduce_mem_usage reduce mem usage	2021-10-01 08:03:41 +02:00
Pascal Seitz	aa0396fe27	fix variable names	2021-10-01 13:48:51 +08:00
Pascal Seitz	8d8315f8d0	prealloc vec in postinglist	2021-09-29 09:02:38 +08:00
Pascal Seitz	078c0a2e2e	reserve vec	2021-09-29 08:45:04 +08:00
Pascal Seitz	f21e8dd875	use only segment ordinal in docidmapping	2021-09-29 08:44:56 +08:00
Tomoko Uchida	74e36c7e97	Add unit tests for tokenizers and filters (#1156 ) * add unit test for SimpleTokenizer * add unit tests for tokenizers and filters.	2021-09-27 10:22:01 +09:00
PSeitz	f27ae04282	fix slope calculation in multilinear interpol (#1161 ) add test to check for compression	2021-09-27 10:14:03 +09:00
PSeitz	0ce49c9dd4	use lz4_flex 0.9.0 (#1160 )	2021-09-27 10:12:20 +09:00
PSeitz	fe8e58e078	Merge pull request #1154 from PSeitz/delete_bitset add DeleteBitSet iterator	2021-09-24 09:37:39 +02:00
Pascal Seitz	efc0d8341b	fix comment	2021-09-24 15:09:21 +08:00
Pascal Seitz	22bcc83d10	fix padding in initialization	2021-09-24 14:43:04 +08:00
Pascal Seitz	5ee5037934	create and use ReadSerializedBitSet	2021-09-24 12:53:33 +08:00
Pascal Seitz	c217bfed1e	cargo fmt	2021-09-23 21:02:19 +08:00
Pascal Seitz	c27ccd3e24	improve naming	2021-09-23 21:02:09 +08:00
Paul Masurel	367f5da782	Fixed comment to the index accessor	2021-09-23 21:53:48 +09:00
Mestery	b256df6599	add index accessor for index writer (#1159 ) * add index accessor for index writer * Update src/indexer/index_writer.rs Co-authored-by: Paul Masurel <paul@quickwit.io>	2021-09-23 21:49:20 +09:00
Pascal Seitz	d7a6a409a1	renames	2021-09-23 20:33:11 +08:00
Pascal Seitz	a1f5cead96	AliveBitSet instead of DeleteBitSet	2021-09-23 20:03:57 +08:00
dependabot[bot]	37c5fe3c86	Update memmap2 requirement from 0.4 to 0.5 (#1157 ) Updates the requirements on [memmap2](https://github.com/RazrFalcon/memmap2-rs) to permit the latest version. - [Release notes](https://github.com/RazrFalcon/memmap2-rs/releases) - [Changelog](https://github.com/RazrFalcon/memmap2-rs/blob/master/CHANGELOG.md) - [Commits](https://github.com/RazrFalcon/memmap2-rs/compare/v0.4.0...v0.5.0) --- updated-dependencies: - dependency-name: memmap2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2021-09-23 20:18:27 +09:00
Pascal Seitz	4583fa270b	fixes	2021-09-23 10:39:53 +08:00
Pascal Seitz	beb3a5bd73	fix len	2021-09-18 17:58:15 +08:00
Pascal Seitz	93cbd52bf0	move code to biset, add inline, add benchmark	2021-09-18 17:35:22 +08:00
Pascal Seitz	c22177a005	add iterator	2021-09-17 15:29:27 +08:00
Pascal Seitz	4da71273e1	add de/serialization for bitset remove len footgun	2021-09-17 10:28:12 +08:00
dependabot[bot]	2c78b31aab	Update memmap2 requirement from 0.3 to 0.4 (#1155 ) Updates the requirements on [memmap2](https://github.com/RazrFalcon/memmap2-rs) to permit the latest version. - [Release notes](https://github.com/RazrFalcon/memmap2-rs/releases) - [Changelog](https://github.com/RazrFalcon/memmap2-rs/blob/master/CHANGELOG.md) - [Commits](https://github.com/RazrFalcon/memmap2-rs/compare/v.0.3.0...v0.4.0)	2021-09-17 08:52:52 +09:00
Pascal Seitz	4ae1d87632	add DeleteBitSet iterator	2021-09-15 23:10:04 +08:00
Paul Masurel	46b86a7976	Bounced version and edited changelog	2021-09-10 23:05:09 +09:00
PSeitz	3bc177e69d	fix #1151 (#1152 ) * fix #1151 Fixes a off by one error in the stats for the index fast field in the multi value fast field. When retrieving the data range for a docid, `get(doc)..get(docid+1)` is requested. On creation the num_vals statistic was set to doc instead of docid + 1. In the multivaluelinearinterpol fast field the last value was therefore not serialized (and would return 0 instead in most cases). So the last document get(lastdoc)..get(lastdoc + 1) would return the invalid range `value..0`. This PR adds a proptest to cover this scenario. A combination of a large number values, since multilinear interpolation is only active for more than 5_000 values, and a merge is required.	2021-09-10 23:00:37 +09:00
PSeitz	319609e9c1	test cargo-llvm-cov (#1149 )	2021-09-03 22:00:43 +09:00
Kanji Yomoda	9d87b89718	Fix incorrect comment for Index::create_in_dir (#1148 ) * Fix incorrect comment for Index::create_in_dir	2021-09-03 10:37:16 +09:00
Tomoko Uchida	dd81e38e53	Add WhitespaceTokenizer (#1147 ) * Add WhitespaceTokenizer.	2021-08-29 18:20:49 +09:00
Paul Masurel	9f32b22602	Preparing for release.	2021-08-26 09:07:08 +09:00
sigaloid	096ce7488e	Resolve some clippys, format (#1144 ) * cargo +nightly clippy --fix -Z unstable-options	2021-08-26 08:46:00 +09:00
PSeitz	a1782dd172	Update index_sorting.md	2021-08-25 07:55:50 +01:00
PSeitz	000d76b11a	Update index_sorting.md	2021-08-24 19:28:06 +01:00
PSeitz	abd29f6646	Update index_sorting.md	2021-08-24 19:26:19 +01:00
PSeitz	b4ecf0ab2f	Merge pull request #1146 from tantivy-search/sorting_doc add sorting to book	2021-08-23 17:37:54 +01:00
Pascal Seitz	798f7dbf67	add sorting to book	2021-08-23 17:36:41 +01:00
PSeitz	06a2e47c8d	Merge pull request #1145 from tantivy-search/blub2 cargo fmt	2021-08-21 18:52:50 +01:00
Pascal Seitz	e0b83eb291	cargo fmt	2021-08-21 18:52:10 +01:00
PSeitz	13401f46ea	add wildcard mention	2021-08-21 18:10:33 +01:00
PSeitz	1a45b030dc	Merge pull request #1141 from tantivy-search/tantivy_common dissolve common module	2021-08-20 08:03:37 +01:00
Pascal Seitz	62052bcc2d	add missing test function closes #1139	2021-08-20 07:26:22 +01:00
Pascal Seitz	3265f7bec3	dissolve common module	2021-08-19 23:26:34 +01:00
Pascal Seitz	ee0881712a	move bitset to common crate, move composite file to directory	2021-08-19 17:45:09 +01:00
PSeitz	483e0336b6	Merge pull request #1140 from tantivy-search/tantivy_common rename common to tantivy-common	2021-08-19 13:02:54 +01:00
Pascal Seitz	3e8f267e33	rename common to tantivy-common	2021-08-19 10:27:20 +01:00
Paul Masurel	3b247fd968	Version bump	2021-08-19 10:12:30 +09:00
Paul Masurel	750f6e6479	Removed obsolete unit test (#1138 )	2021-08-19 10:07:49 +09:00
Evance Soumaoro	5b475e6603	Checksum validation using active files (#1130 ) * now validate checksum uses segment files not managed files	2021-08-19 10:03:20 +09:00
PSeitz	0ca7f73dc5	add docs badge, fix build badge	2021-08-13 19:40:33 +01:00
PSeitz	47ed18845e	Merge pull request #1136 from tantivy-search/minor_fixes more docs detail	2021-08-13 18:11:47 +01:00
Pascal Seitz	dc141cdb29	more docs detail remove code duplicate	2021-08-13 17:40:13 +01:00
PSeitz	f6cf6e889b	Merge pull request #1133 from tantivy-search/merge_overflow test doc_freq and term_freq in sorted index	2021-08-05 07:53:46 +01:00
Pascal Seitz	f379a80233	test doc_freq and term_freq in sorted index	2021-08-03 11:38:05 +01:00
PSeitz	4a320fd1ff	fix delta position in merge and index sorting (#1132 ) fixes #1125	2021-08-03 18:06:36 +09:00
PSeitz	85d23e8e3b	Merge pull request #1129 from tantivy-search/merge_overflow add long running test in ci	2021-08-02 15:54:31 +01:00
Pascal Seitz	022ab9d298	don't run as pr	2021-08-02 15:44:00 +01:00
Pascal Seitz	605e8603dc	add positions to long running test	2021-08-02 15:29:49 +01:00
Pascal Seitz	70f160b329	add long running test in ci	2021-08-02 11:35:39 +01:00
PSeitz	6d265e6bed	fix gh action name	2021-08-02 10:38:01 +01:00
PSeitz	fdc512391b	Merge pull request #1128 from tantivy-search/merge_overflow add sort to functional test, add env for iterations	2021-08-02 10:29:16 +01:00
Pascal Seitz	108714c934	add sort to functional test, add env for iterations	2021-08-02 10:11:17 +01:00
Paul Masurel	44e8cf98a5	Cargo fmt	2021-07-30 15:30:01 +09:00
Paul Masurel	f0ee69d9e9	Remove the complicated block search logic for a simpler branchless (#1124 ) binary search The code is simpler and faster. Before test postings::bench::bench_segment_intersection ... bench: 2,093,697 ns/iter (+/- 115,509) test postings::bench::bench_skip_next_p01 ... bench: 58,585 ns/iter (+/- 796) test postings::bench::bench_skip_next_p1 ... bench: 160,872 ns/iter (+/- 5,164) test postings::bench::bench_skip_next_p10 ... bench: 615,229 ns/iter (+/- 25,108) test postings::bench::bench_skip_next_p90 ... bench: 1,120,509 ns/iter (+/- 22,271) After test postings::bench::bench_segment_intersection ... bench: 1,747,726 ns/iter (+/- 52,867) test postings::bench::bench_skip_next_p01 ... bench: 55,205 ns/iter (+/- 714) test postings::bench::bench_skip_next_p1 ... bench: 131,433 ns/iter (+/- 2,814) test postings::bench::bench_skip_next_p10 ... bench: 478,830 ns/iter (+/- 12,794) test postings::bench::bench_skip_next_p90 ... bench: 931,082 ns/iter (+/- 31,468)	2021-07-30 14:38:42 +09:00
Evance Soumaoro	b8a10c8406	switched to memmap2-rs (#1120 )	2021-07-27 18:40:41 +09:00
PSeitz	ff4813529e	add comments on compression (#1119 )	2021-07-26 22:54:22 +09:00
PSeitz	470bc18e9b	Merge pull request #1118 from tantivy-search/remove_rand move rand to optional dependencies	2021-07-21 18:01:22 +01:00