extend proptest to cover bytes field codec bug

Fix opening bytes index with dynamic codec
Fix #1278
2026-03-31 09:30:45 +00:00 · 2022-02-18 10:50:46 +01:00 · 2022-02-18 07:08:43 +01:00 · 2022-02-17 10:59:46 +09:00 · 2022-02-14 18:19:57 +09:00 · 2022-02-04 15:10:24 +09:00
245 changed files with 9593 additions and 6618 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -6,3 +6,10 @@ updates:
    interval: daily
    time: "20:00"
  open-pull-requests-limit: 10
+
+- package-ecosystem: "github-actions"
+  directory: "/"
+  schedule:
+    interval: daily
+    time: "20:00"
+  open-pull-requests-limit: 10
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -1,27 +1,25 @@
-name:                           coverage
+name: Coverage

 on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
+
 jobs:
-  test:
-    name:                       coverage
-    runs-on:                    ubuntu-latest
-    container:
-      image:                    xd009642/tarpaulin:develop-nightly
-      options:                  --security-opt seccomp=unconfined
+  coverage:
+    runs-on: ubuntu-latest
    steps:
-      - name:                   Checkout repository
-        uses:                   actions/checkout@v2
-
-      - name:                   Generate code coverage
-        run: |
-          cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out Xml
-
-      - name:                   Upload to codecov.io
-        uses:                   codecov/codecov-action@v1
+      - uses: actions/checkout@v2
+      - name: Install Rust
+        run: rustup toolchain install nightly --component llvm-tools-preview
+      - name: Install cargo-llvm-cov
+        run: curl -LsSf https://github.com/taiki-e/cargo-llvm-cov/releases/latest/download/cargo-llvm-cov-x86_64-unknown-linux-gnu.tar.gz | tar xzf - -C ~/.cargo/bin
+      - name: Generate code coverage
+        run: cargo llvm-cov --all-features --workspace --lcov --output-path lcov.info
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v2
        with:
-          # token:                ${{secrets.CODECOV_TOKEN}} # not required for public repos
-          fail_ci_if_error:     true
+          token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos
+          files: lcov.info
+          fail_ci_if_error: true
--- a/.github/workflows/long_running.yml
+++ b/.github/workflows/long_running.yml
@@ -0,0 +1,24 @@
+name: Long running tests
+
+on:
+  push:
+    branches: [ main ]
+
+env:
+  CARGO_TERM_COLOR: always
+  NUM_FUNCTIONAL_TEST_ITERATIONS: 20000
+
+jobs:
+  functional_test_unsorted:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Run indexing_unsorted
+      run: cargo test indexing_unsorted -- --ignored
+  functional_test_sorted:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Run indexing_sorted
+      run: cargo test indexing_sorted -- --ignored
+
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,4 +1,4 @@
-name: Rust
+name: Unit tests

 on:
  push:
@@ -10,7 +10,7 @@ env:
  CARGO_TERM_COLOR: always

 jobs:
-  build:
+  test:

    runs-on: ubuntu-latest

@@ -24,7 +24,19 @@ jobs:
            toolchain: nightly
            override: true
            components: rustfmt
+    - name: Install latest nightly to test also against unstable feature flag
+      uses: actions-rs/toolchain@v1
+      with:
+            toolchain: stable
+            override: true
+            components: rustfmt, clippy
    - name: Run tests
-      run: cargo test --all-features --verbose --workspace
+      run: cargo +stable test --features mmap,brotli-compression,lz4-compression,snappy-compression,failpoints --verbose --workspace
    - name: Check Formatting
-      run: cargo fmt --all -- --check
+      run: cargo +nightly fmt --all -- --check
+    - uses: actions-rs/clippy-check@v1
+      with:
+        toolchain: stable
+        token: ${{ secrets.GITHUB_TOKEN }}
+        args: --tests
+
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 tantivy.iml
+.cargo
 proptest-regressions
 *.swp
 target
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,92 +0,0 @@
-# Based on the "trust" template v0.1.2
-# https://github.com/japaric/trust/tree/v0.1.2
-
-dist: trusty
-language: rust
-services: docker
-sudo: required
-
-env:
-  global:
-    - CRATE_NAME=tantivy
-    - TRAVIS_CARGO_NIGHTLY_FEATURE=""
-    # - secure: eC8HjTi1wgRVCsMAeXEXt8Ckr0YBSGOEnQkkW4/Nde/OZ9jJjz2nmP1ELQlDE7+czHub2QvYtDMG0parcHZDx/Kus0yvyn08y3g2rhGIiE7y8OCvQm1Mybu2D/p7enm6shXquQ6Z5KRfRq+18mHy80wy9ABMA/ukEZdvnfQ76/Een8/Lb0eHaDoXDXn3PqLVtByvSfQQ7OhS60dEScu8PWZ6/l1057P5NpdWbMExBE7Ro4zYXNhkJeGZx0nP/Bd4Jjdt1XfPzMEybV6NZ5xsTILUBFTmOOt603IsqKGov089NExqxYu5bD3K+S4MzF1Nd6VhomNPJqLDCfhlymJCUj5n5Ku4yidlhQbM4Ej9nGrBalJnhcjBjPua5tmMF2WCxP9muKn/2tIOu1/+wc0vMf9Yd3wKIkf5+FtUxCgs2O+NslWvmOMAMI/yD25m7hb4t1IwE/4Bk+GVcWJRWXbo0/m6ZUHzRzdjUY2a1qvw7C9udzdhg7gcnXwsKrSWi2NjMiIVw86l+Zim0nLpKIN41sxZHLaFRG63Ki8zQ/481LGn32awJ6i3sizKS0WD+N1DfR2qYMrwYHaMN0uR0OFXYTJkFvTFttAeUY3EKmRKAuMhmO2YRdSr4/j/G5E9HMc1gSGJj6PxgpQU7EpvxRsmoVAEJr0mszmOj9icGHep/FM=
-
-addons:
-  apt:
-    sources:
-      - ubuntu-toolchain-r-test
-      - kalakris-cmake
-    packages:
-      - gcc-4.8
-      - g++-4.8
-      - libcurl4-openssl-dev
-      - libelf-dev
-      - libdw-dev
-      - binutils-dev
-      - cmake
-
-matrix:
-  include:
-    # Android
-    - env: TARGET=aarch64-linux-android DISABLE_TESTS=1
-    #- env: TARGET=arm-linux-androideabi DISABLE_TESTS=1
-    #- env: TARGET=armv7-linux-androideabi DISABLE_TESTS=1
-    #- env: TARGET=i686-linux-android DISABLE_TESTS=1
-    #- env: TARGET=x86_64-linux-android DISABLE_TESTS=1
-
-    # Linux
-    #- env: TARGET=aarch64-unknown-linux-gnu
-    #- env: TARGET=i686-unknown-linux-gnu
-    - env: TARGET=x86_64-unknown-linux-gnu CODECOV=1 #UPLOAD_DOCS=1
-    # - env: TARGET=x86_64-unknown-linux-musl CODECOV=1
-    # OSX
-    #- env: TARGET=x86_64-apple-darwin
-    #  os: osx
-
-before_install:
-  - set -e
-  - rustup self update
-  - rustup component add rustfmt
-
-install:
-  - sh ci/install.sh
-  - source ~/.cargo/env || true
-  - env | grep "TRAVIS"
-
-before_script:
-  - export PATH=$HOME/.cargo/bin:$PATH
-  - cargo install cargo-update || echo "cargo-update already installed"
-  - cargo install cargo-travis || echo "cargo-travis already installed"
-
-script:
-  - bash ci/script.sh
-  - cargo fmt --all -- --check
-
-before_deploy:
-  - sh ci/before_deploy.sh
-
-after_success:
-  # Needs GH_TOKEN env var to be set in travis settings
-  - if [[ -v GH_TOKEN ]]; then echo "GH TOKEN IS SET"; else echo "GH TOKEN NOT SET"; fi
-  - if [[ -v UPLOAD_DOCS ]]; then cargo doc; cargo doc-upload; else echo "doc upload disabled."; fi
-
-#cache: cargo
-#before_cache:
-#  # Travis can't cache files that are not readable by "others"
-#  - chmod -R a+r $HOME/.cargo
-#  - find ./target/debug -type f -maxdepth 1 -delete
-#  - rm -f  ./target/.rustc_info.json
-#  - rm -fr ./target/debug/{deps,.fingerprint}/tantivy*
-#  - rm -r target/debug/examples/
-#  - ls -1 examples/ | sed -e 's/\.rs$//' | xargs -I "{}" find target/* -name "*{}*" -type f -delete
-
-#branches:
-#  only:
-#    # release tags
-#    - /^v\d+\.\d+\.\d+.*$/
-#    - master
-
-notifications:
-  email:
-    on_success: never
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,27 @@
+Tantivy 0.17
+================================
+- LogMergePolicy now triggers merges if the ratio of deleted documents reaches a threshold (@shikhar @fulmicoton) [#115](https://github.com/quickwit-oss/tantivy/issues/115)
+- Adds a searcher Warmer API (@shikhar @fulmicoton)
+- Change to non-strict schema. Ignore fields in data which are not defined in schema. Previously this returned an error. #1211
+- Facets are necessarily indexed. Existing index with indexed facets should work out of the box. Index without facets that are marked with index: false should be broken (but they were already broken in a sense). (@fulmicoton) #1195 .
+- Bugfix that could in theory impact durability in theory on some filesystems [#1224](https://github.com/quickwit-oss/tantivy/issues/1224)
+- Schema now offers not indexing fieldnorms (@lpouget) [#922](https://github.com/quickwit-oss/tantivy/issues/922)
+- Reduce the number of fsync calls [#1225](https://github.com/quickwit-oss/tantivy/issues/1225)
+
+Tantivy 0.16.2
+================================
+- Bugfix in FuzzyTermQuery. (tranposition_cost_one was not doing anything)
+
+Tantivy 0.16.1
+========================
+- Major Bugfix on multivalued fastfield.  #1151
+- Demux operation (@PSeitz)
+
+Tantivy 0.16.0
+=========================
+- Bugfix in the filesum check. (@evanxg852000) #1127
+- Bugfix in positions when the index is sorted by a field. (@appaquet) #1125
+
 Tantivy 0.15.3
 =========================
 - Major bugfix. Deleting documents was broken when the index was sorted by a field. (@appaquet, @fulmicoton) #1101
@@ -104,7 +128,7 @@ Tantivy 0.12.0
 ## How to update?

 Crates relying on custom tokenizer, or registering tokenizer in the manager will require some
-minor changes. Check https://github.com/tantivy-search/tantivy/blob/main/examples/custom_tokenizer.rs
+minor changes. Check https://github.com/quickwit-oss/tantivy/blob/main/examples/custom_tokenizer.rs
 to check for some code sample.

 Tantivy 0.11.3
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,13 +1,13 @@
 [package]
 name = "tantivy"
-version = "0.16.0-dev"
+version = "0.17.0-dev"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
 description = """Search engine library"""
 documentation = "https://docs.rs/tantivy/"
-homepage = "https://github.com/tantivy-search/tantivy"
-repository = "https://github.com/tantivy-search/tantivy"
+homepage = "https://github.com/quickwit-oss/tantivy"
+repository = "https://github.com/quickwit-oss/tantivy"
 readme = "README.md"
 keywords = ["search", "information", "retrieval"]
 edition = "2018"
@@ -19,8 +19,8 @@ crc32fast = "1.2.1"
 once_cell = "1.7.2"
 regex ={ version = "1.5.4", default-features = false, features = ["std"] }
 tantivy-fst = "0.3"
-memmap = {version = "0.7", optional=true}
-lz4_flex = { version = "0.8.0", default-features = false, features = ["checked-decode"], optional = true }
+memmap2 = {version = "0.5", optional=true}
+lz4_flex = { version = "0.9", default-features = false, features = ["checked-decode"], optional = true }
 brotli = { version = "3.3", optional = true }
 snap = { version = "1.0.5", optional = true }
 tempfile = { version = "3.2", optional = true }
@@ -31,13 +31,13 @@ num_cpus = "1.13"
 fs2={ version = "0.4.3", optional = true }
 levenshtein_automata = "0.2"
 uuid = { version = "0.8.2", features = ["v4", "serde"] }
-crossbeam = "0.8"
+crossbeam = "0.8.1"
 futures = { version = "0.3.15", features = ["thread-pool"] }
 tantivy-query-grammar = { version="0.15.0", path="./query-grammar" }
 tantivy-bitpacker = { version="0.1", path="./bitpacker" }
-common = { version="0.1", path="./common" }
+common = { version = "0.1", path = "./common/", package = "tantivy-common" }
 fastfield_codecs = { version="0.1", path="./fastfield_codecs", default-features = false }
-ownedbytes = { version="0.1", path="./ownedbytes" }
+ownedbytes = { version="0.2", path="./ownedbytes" }
 stable_deref_trait = "1.2"
 rust-stemmers = "1.2"
 downcast-rs = "1.2"
@@ -46,15 +46,15 @@ census = "0.4"
 fnv = "1.0.7"
 thiserror = "1.0.24"
 htmlescape = "0.3.1"
-fail = "0.4"
+fail = "0.5"
 murmurhash32 = "0.2"
 chrono = "0.4.19"
 smallvec = "1.6.1"
 rayon = "1.5"
-lru = "0.6.5"
-fastdivide = "0.3"
+lru = "0.7.0"
+fastdivide = "0.4"
 itertools = "0.10.0"
-measure_time = "0.7.0"
+measure_time = "0.8.0"

 [target.'cfg(windows)'.dependencies]
 winapi = "0.3.9"
@@ -64,10 +64,12 @@ rand = "0.8.3"
 maplit = "1.0.2"
 matches = "0.1.8"
 proptest = "1.0"
-criterion = "0.3.4"
+criterion = "0.3.5"
+test-log = "0.2.8"
+env_logger = "0.9.0"

 [dev-dependencies.fail]
-version = "0.4"
+version = "0.5"
 features = ["failpoints"]

 [profile.release]
@@ -81,7 +83,7 @@ overflow-checks = true

 [features]
 default = ["mmap", "lz4-compression" ]
-mmap = ["fs2", "tempfile", "memmap"]
+mmap = ["fs2", "tempfile", "memmap2"]

 brotli-compression = ["brotli"]
 lz4-compression = ["lz4_flex"]
@@ -89,14 +91,10 @@ snappy-compression = ["snap"]

 failpoints = ["fail/failpoints"]
 unstable = [] # useful for benches.
-wasm-bindgen = ["uuid/wasm-bindgen"]

 [workspace]
 members = ["query-grammar", "bitpacker", "common", "fastfield_codecs", "ownedbytes"]

-[badges]
-travis-ci = { repository = "tantivy-search/tantivy" }
-
 # Following the "fail" crate best practises, we isolate
 # tests that define specific behavior in fail check points
 # in a different binary.
--- a/3
+++ b/3
@@ -1,3 +1,6 @@
 test:
 	echo "Run test only... No examples."
 	cargo test --tests --lib
+
+fmt:
+	cargo +nightly fmt --all
--- a/README.md
+++ b/README.md
@@ -1,25 +1,13 @@

-[![Build Status](https://travis-ci.org/tantivy-search/tantivy.svg?branch=main)](https://travis-ci.org/tantivy-search/tantivy)
-[![codecov](https://codecov.io/gh/tantivy-search/tantivy/branch/main/graph/badge.svg)](https://codecov.io/gh/tantivy-search/tantivy)
-[![Join the chat at https://gitter.im/tantivy-search/tantivy](https://badges.gitter.im/tantivy-search/tantivy.svg)](https://gitter.im/tantivy-search/tantivy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+[![Docs](https://docs.rs/tantivy/badge.svg)](https://docs.rs/crate/tantivy/)
+[![Build Status](https://github.com/quickwit-oss/tantivy/actions/workflows/test.yml/badge.svg)](https://github.com/quickwit-oss/tantivy/actions/workflows/test.yml)
+[![codecov](https://codecov.io/gh/quickwit-oss/tantivy/branch/main/graph/badge.svg)](https://codecov.io/gh/quickwit-oss/tantivy)
+[![Join the chat at https://discord.gg/MT27AG5EVE](https://shields.io/discord/908281611840282624?label=chat%20on%20discord)](https://discord.gg/MT27AG5EVE)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![Build status](https://ci.appveyor.com/api/projects/status/r7nb13kj23u8m9pj/branch/main?svg=true)](https://ci.appveyor.com/project/fulmicoton/tantivy/branch/main)
 [![Crates.io](https://img.shields.io/crates/v/tantivy.svg)](https://crates.io/crates/tantivy)

 ![Tantivy](https://tantivy-search.github.io/logo/tantivy-logo.png)

-[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/0)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/0)
-[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/1)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/1)
-[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/2)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/2)
-[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/3)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/3)
-[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/4)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/4)
-[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/5)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/5)
-[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/6)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/6)
-[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/7)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/7)
-
-[![Become a patron](https://c5.patreon.com/external/logo/become_a_patron_button.png)](https://www.patreon.com/fulmicoton)
-
-
 **Tantivy** is a **full text search engine library** written in Rust.

 It is closer to [Apache Lucene](https://lucene.apache.org/) than to [Elasticsearch](https://www.elastic.co/products/elasticsearch) or [Apache Solr](https://lucene.apache.org/solr/) in the sense it is not
@@ -30,7 +18,7 @@ Tantivy is, in fact, strongly inspired by Lucene's design.

 # Benchmark

-The following [benchmark](https://tantivy-search.github.io/bench/) break downs 
+The following [benchmark](https://tantivy-search.github.io/bench/) break downs
 performance for different type of queries / collection.

 Your mileage WILL vary depending on the nature of queries and their load.
@@ -38,7 +26,7 @@ Your mileage WILL vary depending on the nature of queries and their load.
 # Features

 - Full-text search
- Configurable tokenizer (stemming available for 17 Latin languages with third party support for Chinese ([tantivy-jieba](https://crates.io/crates/tantivy-jieba) and [cang-jie](https://crates.io/crates/cang-jie)), Japanese ([lindera](https://github.com/lindera-morphology/lindera-tantivy) and [tantivy-tokenizer-tiny-segmenter](https://crates.io/crates/tantivy-tokenizer-tiny-segmenter)) and Korean ([lindera](https://github.com/lindera-morphology/lindera-tantivy) + [lindera-ko-dic-builder](https://github.com/lindera-morphology/lindera-ko-dic-builder))
+- Configurable tokenizer (stemming available for 17 Latin languages with third party support for Chinese ([tantivy-jieba](https://crates.io/crates/tantivy-jieba) and [cang-jie](https://crates.io/crates/cang-jie)), Japanese ([lindera](https://github.com/lindera-morphology/lindera-tantivy), [Vaporetto](https://crates.io/crates/vaporetto_tantivy), and [tantivy-tokenizer-tiny-segmenter](https://crates.io/crates/tantivy-tokenizer-tiny-segmenter)) and Korean ([lindera](https://github.com/lindera-morphology/lindera-tantivy) + [lindera-ko-dic-builder](https://github.com/lindera-morphology/lindera-ko-dic-builder))
 - Fast (check out the :racehorse: :sparkles: [benchmark](https://tantivy-search.github.io/bench/) :sparkles: :racehorse:)
 - Tiny startup time (<10ms), perfect for command line tools
 - BM25 scoring (the same as Lucene)
@@ -60,7 +48,7 @@ Your mileage WILL vary depending on the nature of queries and their load.
 ## Non-features

 - Distributed search is out of the scope of Tantivy. That being said, Tantivy is a
-library upon which one could build a distributed search. Serializable/mergeable collector state for instance, 
+library upon which one could build a distributed search. Serializable/mergeable collector state for instance,
 are within the scope of Tantivy.


@@ -69,22 +57,21 @@ are within the scope of Tantivy.
 Tantivy works on stable Rust (>= 1.27) and supports Linux, MacOS, and Windows.

 - [Tantivy's simple search example](https://tantivy-search.github.io/examples/basic_search.html)
- [tantivy-cli and its tutorial](https://github.com/tantivy-search/tantivy-cli) - `tantivy-cli` is an actual command line interface that makes it easy for you to create a search engine,
+- [tantivy-cli and its tutorial](https://github.com/quickwit-oss/tantivy-cli) - `tantivy-cli` is an actual command line interface that makes it easy for you to create a search engine,
 index documents, and search via the CLI or a small server with a REST API.
 It walks you through getting a wikipedia search engine up and running in a few minutes.
 - [Reference doc for the last released version](https://docs.rs/tantivy/)

 # How can I support this project?

-There are many ways to support this project. 
+There are many ways to support this project.

- Use Tantivy and tell us about your experience on [Gitter](https://gitter.im/tantivy-search/tantivy) or by email (paul.masurel@gmail.com)
+- Use Tantivy and tell us about your experience on [Discord](https://discord.gg/MT27AG5EVE) or by email (paul.masurel@gmail.com)
 - Report bugs
 - Write a blog post
 - Help with documentation by asking questions or submitting PRs
- Contribute code (you can join [our Gitter](https://gitter.im/tantivy-search/tantivy))
+- Contribute code (you can join [our Discord server](https://discord.gg/MT27AG5EVE))
 - Talk about Tantivy around you
- [![Become a patron](https://c5.patreon.com/external/logo/become_a_patron_button.png)](https://www.patreon.com/fulmicoton)

 # Contributing code

@@ -96,7 +83,7 @@ Tantivy compiles on stable Rust but requires `Rust >= 1.27`.
 To check out and run tests, you can simply run:

 ```bash
-    git clone https://github.com/tantivy-search/tantivy.git
+    git clone https://github.com/quickwit-oss/tantivy.git
    cd tantivy
    cargo build
 ```
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -1,12 +1,12 @@
 [package]
 name = "tantivy-bitpacker"
-version = "0.1.0"
+version = "0.1.1"
 edition = "2018"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = []
 description = """Tantivy-sub crate: bitpacking"""
-repository = "https://github.com/tantivy-search/tantivy"
+repository = "https://github.com/quickwit-oss/tantivy"
 keywords = []


--- a/bitpacker/src/bitpacker.rs
+++ b/bitpacker/src/bitpacker.rs
@@ -1,4 +1,5 @@
-use std::{convert::TryInto, io};
+use std::convert::TryInto;
+use std::io;

 pub struct BitPacker {
    mini_buffer: u64,
--- a/bitpacker/src/blocked_bitpacker.rs
+++ b/bitpacker/src/blocked_bitpacker.rs
@@ -1,12 +1,11 @@
+use super::bitpacker::BitPacker;
+use super::compute_num_bits;
 use crate::{minmax, BitUnpacker};

-use super::{bitpacker::BitPacker, compute_num_bits};
-
 const BLOCK_SIZE: usize = 128;

 /// `BlockedBitpacker` compresses data in blocks of
 /// 128 elements, while keeping an index on it
-///
 #[derive(Debug, Clone)]
 pub struct BlockedBitpacker {
    // bitpacked blocks
--- a/bitpacker/src/lib.rs
+++ b/bitpacker/src/lib.rs
@@ -1,8 +1,7 @@
 mod bitpacker;
 mod blocked_bitpacker;

-pub use crate::bitpacker::BitPacker;
-pub use crate::bitpacker::BitUnpacker;
+pub use crate::bitpacker::{BitPacker, BitUnpacker};
 pub use crate::blocked_bitpacker::BlockedBitpacker;

 /// Computes the number of bits that will be used for bitpacking.
@@ -50,3 +49,32 @@ where
    }
    None
 }
+
+#[test]
+fn test_compute_num_bits() {
+    assert_eq!(compute_num_bits(1), 1u8);
+    assert_eq!(compute_num_bits(0), 0u8);
+    assert_eq!(compute_num_bits(2), 2u8);
+    assert_eq!(compute_num_bits(3), 2u8);
+    assert_eq!(compute_num_bits(4), 3u8);
+    assert_eq!(compute_num_bits(255), 8u8);
+    assert_eq!(compute_num_bits(256), 9u8);
+    assert_eq!(compute_num_bits(5_000_000_000), 33u8);
+}
+
+#[test]
+fn test_minmax_empty() {
+    let vals: Vec<u32> = vec![];
+    assert_eq!(minmax(vals.into_iter()), None);
+}
+
+#[test]
+fn test_minmax_one() {
+    assert_eq!(minmax(vec![1].into_iter()), Some((1, 1)));
+}
+
+#[test]
+fn test_minmax_two() {
+    assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2)));
+    assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2)));
+}
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "common"
+name = "tantivy-common"
 version = "0.1.0"
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 license = "MIT"
@@ -10,3 +10,8 @@ description = "common traits and utility functions used by multiple tantivy subc

 [dependencies]
 byteorder = "1.4.3"
+ownedbytes = { version="0.2", path="../ownedbytes" }
+
+[dev-dependencies]
+proptest = "1.0.0"
+rand = "0.8.4"
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -0,0 +1,745 @@
+use std::convert::TryInto;
+use std::io::Write;
+use std::{fmt, io, u64};
+
+use ownedbytes::OwnedBytes;
+
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub struct TinySet(u64);
+
+impl fmt::Debug for TinySet {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.into_iter().collect::<Vec<u32>>().fmt(f)
+    }
+}
+
+pub struct TinySetIterator(TinySet);
+impl Iterator for TinySetIterator {
+    type Item = u32;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.0.pop_lowest()
+    }
+}
+
+impl IntoIterator for TinySet {
+    type Item = u32;
+    type IntoIter = TinySetIterator;
+    fn into_iter(self) -> Self::IntoIter {
+        TinySetIterator(self)
+    }
+}
+
+impl TinySet {
+    pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
+        writer.write_all(self.0.to_le_bytes().as_ref())
+    }
+
+    pub fn into_bytes(self) -> [u8; 8] {
+        self.0.to_le_bytes()
+    }
+
+    #[inline]
+    pub fn deserialize(data: [u8; 8]) -> Self {
+        let val: u64 = u64::from_le_bytes(data);
+        TinySet(val)
+    }
+
+    /// Returns an empty `TinySet`.
+    #[inline]
+    pub fn empty() -> TinySet {
+        TinySet(0u64)
+    }
+
+    /// Returns a full `TinySet`.
+    #[inline]
+    pub fn full() -> TinySet {
+        TinySet::empty().complement()
+    }
+
+    pub fn clear(&mut self) {
+        self.0 = 0u64;
+    }
+
+    /// Returns the complement of the set in `[0, 64[`.
+    ///
+    /// Careful on making this function public, as it will break the padding handling in the last
+    /// bucket.
+    #[inline]
+    fn complement(self) -> TinySet {
+        TinySet(!self.0)
+    }
+
+    /// Returns true iff the `TinySet` contains the element `el`.
+    #[inline]
+    pub fn contains(self, el: u32) -> bool {
+        !self.intersect(TinySet::singleton(el)).is_empty()
+    }
+
+    /// Returns the number of elements in the TinySet.
+    #[inline]
+    pub fn len(self) -> u32 {
+        self.0.count_ones()
+    }
+
+    /// Returns the intersection of `self` and `other`
+    #[inline]
+    #[must_use]
+    pub fn intersect(self, other: TinySet) -> TinySet {
+        TinySet(self.0 & other.0)
+    }
+
+    /// Creates a new `TinySet` containing only one element
+    /// within `[0; 64[`
+    #[inline]
+    pub fn singleton(el: u32) -> TinySet {
+        TinySet(1u64 << u64::from(el))
+    }
+
+    /// Insert a new element within [0..64)
+    #[inline]
+    #[must_use]
+    pub fn insert(self, el: u32) -> TinySet {
+        self.union(TinySet::singleton(el))
+    }
+
+    /// Removes an element within [0..64)
+    #[inline]
+    #[must_use]
+    pub fn remove(self, el: u32) -> TinySet {
+        self.intersect(TinySet::singleton(el).complement())
+    }
+
+    /// Insert a new element within [0..64)
+    ///
+    /// returns true if the set changed
+    #[inline]
+    pub fn insert_mut(&mut self, el: u32) -> bool {
+        let old = *self;
+        *self = old.insert(el);
+        old != *self
+    }
+
+    /// Remove a element within [0..64)
+    ///
+    /// returns true if the set changed
+    #[inline]
+    pub fn remove_mut(&mut self, el: u32) -> bool {
+        let old = *self;
+        *self = old.remove(el);
+        old != *self
+    }
+
+    /// Returns the union of two tinysets
+    #[inline]
+    #[must_use]
+    pub fn union(self, other: TinySet) -> TinySet {
+        TinySet(self.0 | other.0)
+    }
+
+    /// Returns true iff the `TinySet` is empty.
+    #[inline]
+    pub fn is_empty(self) -> bool {
+        self.0 == 0u64
+    }
+
+    /// Returns the lowest element in the `TinySet`
+    /// and removes it.
+    #[inline]
+    pub fn pop_lowest(&mut self) -> Option<u32> {
+        if self.is_empty() {
+            None
+        } else {
+            let lowest = self.0.trailing_zeros() as u32;
+            self.0 ^= TinySet::singleton(lowest).0;
+            Some(lowest)
+        }
+    }
+
+    /// Returns a `TinySet` than contains all values up
+    /// to limit excluded.
+    ///
+    /// The limit is assumed to be strictly lower than 64.
+    pub fn range_lower(upper_bound: u32) -> TinySet {
+        TinySet((1u64 << u64::from(upper_bound % 64u32)) - 1u64)
+    }
+
+    /// Returns a `TinySet` that contains all values greater
+    /// or equal to the given limit, included. (and up to 63)
+    ///
+    /// The limit is assumed to be strictly lower than 64.
+    pub fn range_greater_or_equal(from_included: u32) -> TinySet {
+        TinySet::range_lower(from_included).complement()
+    }
+}
+
+#[derive(Clone)]
+pub struct BitSet {
+    tinysets: Box<[TinySet]>,
+    len: u64,
+    max_value: u32,
+}
+
+fn num_buckets(max_val: u32) -> u32 {
+    (max_val + 63u32) / 64u32
+}
+
+impl BitSet {
+    /// serialize a `BitSet`.
+    pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
+        writer.write_all(self.max_value.to_le_bytes().as_ref())?;
+        for tinyset in self.tinysets.iter().cloned() {
+            writer.write_all(&tinyset.into_bytes())?;
+        }
+        writer.flush()?;
+        Ok(())
+    }
+
+    /// Create a new `BitSet` that may contain elements
+    /// within `[0, max_val)`.
+    pub fn with_max_value(max_value: u32) -> BitSet {
+        let num_buckets = num_buckets(max_value);
+        let tinybitsets = vec![TinySet::empty(); num_buckets as usize].into_boxed_slice();
+        BitSet {
+            tinysets: tinybitsets,
+            len: 0,
+            max_value,
+        }
+    }
+
+    /// Create a new `BitSet` that may contain elements. Initially all values will be set.
+    /// within `[0, max_val)`.
+    pub fn with_max_value_and_full(max_value: u32) -> BitSet {
+        let num_buckets = num_buckets(max_value);
+        let mut tinybitsets = vec![TinySet::full(); num_buckets as usize].into_boxed_slice();
+
+        // Fix padding
+        let lower = max_value % 64u32;
+        if lower != 0 {
+            tinybitsets[tinybitsets.len() - 1] = TinySet::range_lower(lower);
+        }
+        BitSet {
+            tinysets: tinybitsets,
+            len: max_value as u64,
+            max_value,
+        }
+    }
+
+    /// Removes all elements from the `BitSet`.
+    pub fn clear(&mut self) {
+        for tinyset in self.tinysets.iter_mut() {
+            *tinyset = TinySet::empty();
+        }
+    }
+
+    /// Intersect with serialized bitset
+    pub fn intersect_update(&mut self, other: &ReadOnlyBitSet) {
+        self.intersect_update_with_iter(other.iter_tinysets());
+    }
+
+    /// Intersect with tinysets
+    fn intersect_update_with_iter(&mut self, other: impl Iterator<Item = TinySet>) {
+        self.len = 0;
+        for (left, right) in self.tinysets.iter_mut().zip(other) {
+            *left = left.intersect(right);
+            self.len += left.len() as u64;
+        }
+    }
+
+    /// Returns the number of elements in the `BitSet`.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.len as usize
+    }
+
+    /// Inserts an element in the `BitSet`
+    #[inline]
+    pub fn insert(&mut self, el: u32) {
+        // we do not check saturated els.
+        let higher = el / 64u32;
+        let lower = el % 64u32;
+        self.len += if self.tinysets[higher as usize].insert_mut(lower) {
+            1
+        } else {
+            0
+        };
+    }
+
+    /// Inserts an element in the `BitSet`
+    #[inline]
+    pub fn remove(&mut self, el: u32) {
+        // we do not check saturated els.
+        let higher = el / 64u32;
+        let lower = el % 64u32;
+        self.len -= if self.tinysets[higher as usize].remove_mut(lower) {
+            1
+        } else {
+            0
+        };
+    }
+
+    /// Returns true iff the elements is in the `BitSet`.
+    #[inline]
+    pub fn contains(&self, el: u32) -> bool {
+        self.tinyset(el / 64u32).contains(el % 64)
+    }
+
+    /// Returns the first non-empty `TinySet` associated to a bucket lower
+    /// or greater than bucket.
+    ///
+    /// Reminder: the tiny set with the bucket `bucket`, represents the
+    /// elements from `bucket * 64` to `(bucket+1) * 64`.
+    pub fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
+        self.tinysets[bucket as usize..]
+            .iter()
+            .cloned()
+            .position(|tinyset| !tinyset.is_empty())
+            .map(|delta_bucket| bucket + delta_bucket as u32)
+    }
+
+    #[inline]
+    pub fn max_value(&self) -> u32 {
+        self.max_value
+    }
+
+    /// Returns the tiny bitset representing the
+    /// the set restricted to the number range from
+    /// `bucket * 64` to `(bucket + 1) * 64`.
+    pub fn tinyset(&self, bucket: u32) -> TinySet {
+        self.tinysets[bucket as usize]
+    }
+}
+
+/// Serialized BitSet.
+#[derive(Clone)]
+pub struct ReadOnlyBitSet {
+    data: OwnedBytes,
+    max_value: u32,
+}
+
+pub fn intersect_bitsets(left: &ReadOnlyBitSet, other: &ReadOnlyBitSet) -> ReadOnlyBitSet {
+    assert_eq!(left.max_value(), other.max_value());
+    assert_eq!(left.data.len(), other.data.len());
+    let union_tinyset_it = left
+        .iter_tinysets()
+        .zip(other.iter_tinysets())
+        .map(|(left_tinyset, right_tinyset)| left_tinyset.intersect(right_tinyset));
+    let mut output_dataset: Vec<u8> = Vec::with_capacity(left.data.len());
+    for tinyset in union_tinyset_it {
+        output_dataset.extend_from_slice(&tinyset.into_bytes());
+    }
+    ReadOnlyBitSet {
+        data: OwnedBytes::new(output_dataset),
+        max_value: left.max_value(),
+    }
+}
+
+impl ReadOnlyBitSet {
+    pub fn open(data: OwnedBytes) -> Self {
+        let (max_value_data, data) = data.split(4);
+        assert_eq!(data.len() % 8, 0);
+        let max_value: u32 = u32::from_le_bytes(max_value_data.as_ref().try_into().unwrap());
+        ReadOnlyBitSet { data, max_value }
+    }
+
+    /// Number of elements in the bitset.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.iter_tinysets()
+            .map(|tinyset| tinyset.len() as usize)
+            .sum()
+    }
+
+    /// Iterate the tinyset on the fly from serialized data.
+    #[inline]
+    fn iter_tinysets(&self) -> impl Iterator<Item = TinySet> + '_ {
+        self.data.chunks_exact(8).map(move |chunk| {
+            let tinyset: TinySet = TinySet::deserialize(chunk.try_into().unwrap());
+            tinyset
+        })
+    }
+
+    /// Iterate over the positions of the elements.
+    #[inline]
+    pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
+        self.iter_tinysets()
+            .enumerate()
+            .flat_map(move |(chunk_num, tinyset)| {
+                let chunk_base_val = chunk_num as u32 * 64;
+                tinyset
+                    .into_iter()
+                    .map(move |val| val + chunk_base_val)
+                    .take_while(move |doc| *doc < self.max_value)
+            })
+    }
+
+    /// Returns true iff the elements is in the `BitSet`.
+    #[inline]
+    pub fn contains(&self, el: u32) -> bool {
+        let byte_offset = el / 8u32;
+        let b: u8 = self.data[byte_offset as usize];
+        let shift = (el % 8) as u8;
+        b & (1u8 << shift) != 0
+    }
+
+    /// Maximum value the bitset may contain.
+    /// (Note this is not the maximum value contained in the set.)
+    ///
+    /// A bitset has an intrinsic capacity.
+    /// It only stores elements within [0..max_value).
+    #[inline]
+    pub fn max_value(&self) -> u32 {
+        self.max_value
+    }
+
+    /// Number of bytes used in the bitset representation.
+    pub fn num_bytes(&self) -> usize {
+        self.data.len()
+    }
+}
+
+impl<'a> From<&'a BitSet> for ReadOnlyBitSet {
+    fn from(bitset: &'a BitSet) -> ReadOnlyBitSet {
+        let mut buffer = Vec::with_capacity(bitset.tinysets.len() * 8 + 4);
+        bitset
+            .serialize(&mut buffer)
+            .expect("serializing into a buffer should never fail");
+        ReadOnlyBitSet::open(OwnedBytes::new(buffer))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use std::collections::HashSet;
+
+    use ownedbytes::OwnedBytes;
+    use rand::distributions::Bernoulli;
+    use rand::rngs::StdRng;
+    use rand::{Rng, SeedableRng};
+
+    use super::{BitSet, ReadOnlyBitSet, TinySet};
+
+    #[test]
+    fn test_read_serialized_bitset_full_multi() {
+        for i in 0..1000 {
+            let bitset = BitSet::with_max_value_and_full(i);
+            let mut out = vec![];
+            bitset.serialize(&mut out).unwrap();
+
+            let bitset = ReadOnlyBitSet::open(OwnedBytes::new(out));
+            assert_eq!(bitset.len() as usize, i as usize);
+        }
+    }
+
+    #[test]
+    fn test_read_serialized_bitset_full_block() {
+        let bitset = BitSet::with_max_value_and_full(64);
+        let mut out = vec![];
+        bitset.serialize(&mut out).unwrap();
+
+        let bitset = ReadOnlyBitSet::open(OwnedBytes::new(out));
+        assert_eq!(bitset.len() as usize, 64);
+    }
+
+    #[test]
+    fn test_read_serialized_bitset_full() {
+        let mut bitset = BitSet::with_max_value_and_full(5);
+        bitset.remove(3);
+        let mut out = vec![];
+        bitset.serialize(&mut out).unwrap();
+
+        let bitset = ReadOnlyBitSet::open(OwnedBytes::new(out));
+        assert_eq!(bitset.len(), 4);
+    }
+
+    #[test]
+    fn test_bitset_intersect() {
+        let bitset_serialized = {
+            let mut bitset = BitSet::with_max_value_and_full(5);
+            bitset.remove(1);
+            bitset.remove(3);
+            let mut out = vec![];
+            bitset.serialize(&mut out).unwrap();
+
+            ReadOnlyBitSet::open(OwnedBytes::new(out))
+        };
+
+        let mut bitset = BitSet::with_max_value_and_full(5);
+        bitset.remove(1);
+        bitset.intersect_update(&bitset_serialized);
+
+        assert!(bitset.contains(0));
+        assert!(!bitset.contains(1));
+        assert!(bitset.contains(2));
+        assert!(!bitset.contains(3));
+        assert!(bitset.contains(4));
+
+        bitset.intersect_update_with_iter(vec![TinySet::singleton(0)].into_iter());
+
+        assert!(bitset.contains(0));
+        assert!(!bitset.contains(1));
+        assert!(!bitset.contains(2));
+        assert!(!bitset.contains(3));
+        assert!(!bitset.contains(4));
+        assert_eq!(bitset.len(), 1);
+
+        bitset.intersect_update_with_iter(vec![TinySet::singleton(1)].into_iter());
+        assert!(!bitset.contains(0));
+        assert!(!bitset.contains(1));
+        assert!(!bitset.contains(2));
+        assert!(!bitset.contains(3));
+        assert!(!bitset.contains(4));
+        assert_eq!(bitset.len(), 0);
+    }
+
+    #[test]
+    fn test_read_serialized_bitset_empty() {
+        let mut bitset = BitSet::with_max_value(5);
+        bitset.insert(3);
+        let mut out = vec![];
+        bitset.serialize(&mut out).unwrap();
+
+        let bitset = ReadOnlyBitSet::open(OwnedBytes::new(out));
+        assert_eq!(bitset.len(), 1);
+
+        {
+            let bitset = BitSet::with_max_value(5);
+            let mut out = vec![];
+            bitset.serialize(&mut out).unwrap();
+            let bitset = ReadOnlyBitSet::open(OwnedBytes::new(out));
+            assert_eq!(bitset.len(), 0);
+        }
+    }
+
+    #[test]
+    fn test_tiny_set_remove() {
+        {
+            let mut u = TinySet::empty().insert(63u32).insert(5).remove(63u32);
+            assert_eq!(u.pop_lowest(), Some(5u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty()
+                .insert(63u32)
+                .insert(1)
+                .insert(5)
+                .remove(63u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert_eq!(u.pop_lowest(), Some(5u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty().insert(1).remove(63u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty().insert(1).remove(1u32);
+            assert!(u.pop_lowest().is_none());
+        }
+    }
+    #[test]
+    fn test_tiny_set() {
+        assert!(TinySet::empty().is_empty());
+        {
+            let mut u = TinySet::empty().insert(1u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert!(u.pop_lowest().is_none())
+        }
+        {
+            let mut u = TinySet::empty().insert(1u32).insert(1u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert!(u.pop_lowest().is_none())
+        }
+        {
+            let mut u = TinySet::empty().insert(2u32);
+            assert_eq!(u.pop_lowest(), Some(2u32));
+            u.insert_mut(1u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty().insert(63u32);
+            assert_eq!(u.pop_lowest(), Some(63u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty().insert(63u32).insert(5);
+            assert_eq!(u.pop_lowest(), Some(5u32));
+            assert_eq!(u.pop_lowest(), Some(63u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let original = TinySet::empty().insert(63u32).insert(5);
+            let after_serialize_deserialize = TinySet::deserialize(original.into_bytes());
+            assert_eq!(original, after_serialize_deserialize);
+        }
+    }
+
+    #[test]
+    fn test_bitset() {
+        let test_against_hashset = |els: &[u32], max_value: u32| {
+            let mut hashset: HashSet<u32> = HashSet::new();
+            let mut bitset = BitSet::with_max_value(max_value);
+            for &el in els {
+                assert!(el < max_value);
+                hashset.insert(el);
+                bitset.insert(el);
+            }
+            for el in 0..max_value {
+                assert_eq!(hashset.contains(&el), bitset.contains(el));
+            }
+            assert_eq!(bitset.max_value(), max_value);
+
+            // test deser
+            let mut data = vec![];
+            bitset.serialize(&mut data).unwrap();
+            let ro_bitset = ReadOnlyBitSet::open(OwnedBytes::new(data));
+            for el in 0..max_value {
+                assert_eq!(hashset.contains(&el), ro_bitset.contains(el));
+            }
+            assert_eq!(ro_bitset.max_value(), max_value);
+            assert_eq!(ro_bitset.len(), els.len());
+        };
+
+        test_against_hashset(&[], 0);
+        test_against_hashset(&[], 1);
+        test_against_hashset(&[0u32], 1);
+        test_against_hashset(&[0u32], 100);
+        test_against_hashset(&[1u32, 2u32], 4);
+        test_against_hashset(&[99u32], 100);
+        test_against_hashset(&[63u32], 64);
+        test_against_hashset(&[62u32, 63u32], 64);
+    }
+
+    #[test]
+    fn test_bitset_num_buckets() {
+        use super::num_buckets;
+        assert_eq!(num_buckets(0u32), 0);
+        assert_eq!(num_buckets(1u32), 1);
+        assert_eq!(num_buckets(64u32), 1);
+        assert_eq!(num_buckets(65u32), 2);
+        assert_eq!(num_buckets(128u32), 2);
+        assert_eq!(num_buckets(129u32), 3);
+    }
+
+    #[test]
+    fn test_tinyset_range() {
+        assert_eq!(
+            TinySet::range_lower(3).into_iter().collect::<Vec<u32>>(),
+            [0, 1, 2]
+        );
+        assert!(TinySet::range_lower(0).is_empty());
+        assert_eq!(
+            TinySet::range_lower(63).into_iter().collect::<Vec<u32>>(),
+            (0u32..63u32).collect::<Vec<_>>()
+        );
+        assert_eq!(
+            TinySet::range_lower(1).into_iter().collect::<Vec<u32>>(),
+            [0]
+        );
+        assert_eq!(
+            TinySet::range_lower(2).into_iter().collect::<Vec<u32>>(),
+            [0, 1]
+        );
+        assert_eq!(
+            TinySet::range_greater_or_equal(3)
+                .into_iter()
+                .collect::<Vec<u32>>(),
+            (3u32..64u32).collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn test_bitset_len() {
+        let mut bitset = BitSet::with_max_value(1_000);
+        assert_eq!(bitset.len(), 0);
+        bitset.insert(3u32);
+        assert_eq!(bitset.len(), 1);
+        bitset.insert(103u32);
+        assert_eq!(bitset.len(), 2);
+        bitset.insert(3u32);
+        assert_eq!(bitset.len(), 2);
+        bitset.insert(103u32);
+        assert_eq!(bitset.len(), 2);
+        bitset.insert(104u32);
+        assert_eq!(bitset.len(), 3);
+        bitset.remove(105u32);
+        assert_eq!(bitset.len(), 3);
+        bitset.remove(104u32);
+        assert_eq!(bitset.len(), 2);
+        bitset.remove(3u32);
+        assert_eq!(bitset.len(), 1);
+        bitset.remove(103u32);
+        assert_eq!(bitset.len(), 0);
+    }
+
+    pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
+        StdRng::from_seed([seed_val; 32])
+            .sample_iter(&Bernoulli::new(ratio).unwrap())
+            .take(n as usize)
+            .enumerate()
+            .filter_map(|(val, keep)| if keep { Some(val as u32) } else { None })
+            .collect()
+    }
+
+    pub fn sample(n: u32, ratio: f64) -> Vec<u32> {
+        sample_with_seed(n, ratio, 4)
+    }
+
+    #[test]
+    fn test_bitset_clear() {
+        let mut bitset = BitSet::with_max_value(1_000);
+        let els = sample(1_000, 0.01f64);
+        for &el in &els {
+            bitset.insert(el);
+        }
+        assert!(els.iter().all(|el| bitset.contains(*el)));
+        bitset.clear();
+        for el in 0u32..1000u32 {
+            assert!(!bitset.contains(el));
+        }
+    }
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use test;
+
+    use super::{BitSet, TinySet};
+
+    #[bench]
+    fn bench_tinyset_pop(b: &mut test::Bencher) {
+        b.iter(|| {
+            let mut tinyset = TinySet::singleton(test::black_box(31u32));
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+        });
+    }
+
+    #[bench]
+    fn bench_tinyset_sum(b: &mut test::Bencher) {
+        let tiny_set = TinySet::empty().insert(10u32).insert(14u32).insert(21u32);
+        b.iter(|| {
+            assert_eq!(test::black_box(tiny_set).into_iter().sum::<u32>(), 45u32);
+        });
+    }
+
+    #[bench]
+    fn bench_tinyarr_sum(b: &mut test::Bencher) {
+        let v = [10u32, 14u32, 21u32];
+        b.iter(|| test::black_box(v).iter().cloned().sum::<u32>());
+    }
+
+    #[bench]
+    fn bench_bitset_initialize(b: &mut test::Bencher) {
+        b.iter(|| BitSet::with_max_value(1_000_000));
+    }
+}
--- a/common/src/lib.rs
+++ b/common/src/lib.rs
@@ -1,9 +1,170 @@
+#![allow(clippy::len_without_is_empty)]
+
+use std::ops::Deref;
+
 pub use byteorder::LittleEndian as Endianness;

+mod bitset;
 mod serialize;
 mod vint;
 mod writer;

+pub use bitset::*;
 pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
 pub use vint::{read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt};
 pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};
+
+/// Has length trait
+pub trait HasLen {
+    /// Return length
+    fn len(&self) -> usize;
+
+    /// Returns true iff empty.
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+impl<T: Deref<Target = [u8]>> HasLen for T {
+    fn len(&self) -> usize {
+        self.deref().len()
+    }
+}
+
+const HIGHEST_BIT: u64 = 1 << 63;
+
+/// Maps a `i64` to `u64`
+///
+/// For simplicity, tantivy internally handles `i64` as `u64`.
+/// The mapping is defined by this function.
+///
+/// Maps `i64` to `u64` so that
+/// `-2^63 .. 2^63-1` is mapped
+///     to
+/// `0 .. 2^64-1`
+/// in that order.
+///
+/// This is more suited than simply casting (`val as u64`)
+/// because of bitpacking.
+///
+/// Imagine a list of `i64` ranging from -10 to 10.
+/// When casting negative values, the negative values are projected
+/// to values over 2^63, and all values end up requiring 64 bits.
+///
+/// # See also
+/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html).
+#[inline]
+pub fn i64_to_u64(val: i64) -> u64 {
+    (val as u64) ^ HIGHEST_BIT
+}
+
+/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
+#[inline]
+pub fn u64_to_i64(val: u64) -> i64 {
+    (val ^ HIGHEST_BIT) as i64
+}
+
+/// Maps a `f64` to `u64`
+///
+/// For simplicity, tantivy internally handles `f64` as `u64`.
+/// The mapping is defined by this function.
+///
+/// Maps `f64` to `u64` in a monotonic manner, so that bytes lexical order is preserved.
+///
+/// This is more suited than simply casting (`val as u64`)
+/// which would truncate the result
+///
+/// # Reference
+///
+/// Daniel Lemire's [blog post](https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order/)
+/// explains the mapping in a clear manner.
+///
+/// # See also
+/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html).
+#[inline]
+pub fn f64_to_u64(val: f64) -> u64 {
+    let bits = val.to_bits();
+    if val.is_sign_positive() {
+        bits ^ HIGHEST_BIT
+    } else {
+        !bits
+    }
+}
+
+/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
+#[inline]
+pub fn u64_to_f64(val: u64) -> f64 {
+    f64::from_bits(if val & HIGHEST_BIT != 0 {
+        val ^ HIGHEST_BIT
+    } else {
+        !val
+    })
+}
+
+#[cfg(test)]
+pub mod test {
+
+    use std::f64;
+
+    use proptest::prelude::*;
+
+    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64, BinarySerializable, FixedSize};
+
+    fn test_i64_converter_helper(val: i64) {
+        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
+    }
+
+    fn test_f64_converter_helper(val: f64) {
+        assert_eq!(u64_to_f64(f64_to_u64(val)), val);
+    }
+
+    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
+        let mut buffer = Vec::new();
+        O::default().serialize(&mut buffer).unwrap();
+        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
+    }
+
+    proptest! {
+        #[test]
+        fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) {
+            let left_u64 = f64_to_u64(left);
+            let right_u64 = f64_to_u64(right);
+            assert_eq!(left_u64 < right_u64,  left < right);
+        }
+    }
+
+    #[test]
+    fn test_i64_converter() {
+        assert_eq!(i64_to_u64(i64::min_value()), u64::min_value());
+        assert_eq!(i64_to_u64(i64::max_value()), u64::max_value());
+        test_i64_converter_helper(0i64);
+        test_i64_converter_helper(i64::min_value());
+        test_i64_converter_helper(i64::max_value());
+        for i in -1000i64..1000i64 {
+            test_i64_converter_helper(i);
+        }
+    }
+
+    #[test]
+    fn test_f64_converter() {
+        test_f64_converter_helper(f64::INFINITY);
+        test_f64_converter_helper(f64::NEG_INFINITY);
+        test_f64_converter_helper(0.0);
+        test_f64_converter_helper(-0.0);
+        test_f64_converter_helper(1.0);
+        test_f64_converter_helper(-1.0);
+    }
+
+    #[test]
+    fn test_f64_order() {
+        assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
+            .contains(&f64_to_u64(f64::NAN))); // nan is not a number
+        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa
+        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent
+        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa
+        assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
+        assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
+        assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
+        assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5));
+    }
+}
--- a/common/src/serialize.rs
+++ b/common/src/serialize.rs
@@ -1,10 +1,9 @@
-use crate::Endianness;
-use crate::VInt;
+use std::io::{Read, Write};
+use std::{fmt, io};
+
 use byteorder::{ReadBytesExt, WriteBytesExt};
-use std::fmt;
-use std::io;
-use std::io::Read;
-use std::io::Write;
+
+use crate::{Endianness, VInt};

 /// Trait for a simple binary serialization.
 pub trait BinarySerializable: fmt::Debug + Sized {
@@ -202,8 +201,7 @@ impl BinarySerializable for String {
 #[cfg(test)]
 pub mod test {

-    use super::VInt;
-    use super::*;
+    use super::{VInt, *};
    use crate::serialize::BinarySerializable;
    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
        let mut buffer = Vec::new();
--- a/common/src/vint.rs
+++ b/common/src/vint.rs
@@ -1,8 +1,9 @@
-use super::BinarySerializable;
-use byteorder::{ByteOrder, LittleEndian};
 use std::io;
-use std::io::Read;
-use std::io::Write;
+use std::io::{Read, Write};
+
+use byteorder::{ByteOrder, LittleEndian};
+
+use super::BinarySerializable;

 ///   Wrapper over a `u64` that serializes as a variable int.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
@@ -174,9 +175,7 @@ impl BinarySerializable for VInt {
 #[cfg(test)]
 mod tests {

-    use super::serialize_vint_u32;
-    use super::BinarySerializable;
-    use super::VInt;
+    use super::{serialize_vint_u32, BinarySerializable, VInt};

    fn aux_test_vint(val: u64) {
        let mut v = [14u8; 10];
--- a/common/src/writer.rs
+++ b/common/src/writer.rs
@@ -54,7 +54,8 @@ impl<W: TerminatingWrite> TerminatingWrite for CountingWriter<W> {
    }
 }

-/// Struct used to prevent from calling [`terminate_ref`](trait.TerminatingWrite#method.terminate_ref) directly
+/// Struct used to prevent from calling
+/// [`terminate_ref`](trait.TerminatingWrite.html#tymethod.terminate_ref) directly
 ///
 /// The point is that while the type is public, it cannot be built by anyone
 /// outside of this module.
@@ -64,9 +65,7 @@ pub struct AntiCallToken(());
 pub trait TerminatingWrite: Write {
    /// Indicate that the writer will no longer be used. Internally call terminate_ref.
    fn terminate(mut self) -> io::Result<()>
-    where
-        Self: Sized,
-    {
+    where Self: Sized {
        self.terminate_ref(AntiCallToken(()))
    }

@@ -97,9 +96,10 @@ impl<'a> TerminatingWrite for &'a mut Vec<u8> {
 #[cfg(test)]
 mod test {

-    use super::CountingWriter;
    use std::io::Write;

+    use super::CountingWriter;
+
    #[test]
    fn test_counting_writer() {
        let buffer: Vec<u8> = vec![];
--- a/doc/src/SUMMARY.md
+++ b/doc/src/SUMMARY.md
@@ -7,6 +7,7 @@
 - [Segments](./basis.md)
 - [Defining your schema](./schema.md)
 - [Facetting](./facetting.md)
+- [Index Sorting](./index_sorting.md)
 - [Innerworkings](./innerworkings.md)
  - [Inverted index](./inverted_index.md)
 - [Best practise](./inverted_index.md)
--- a/doc/src/index_sorting.md
+++ b/doc/src/index_sorting.md
@@ -0,0 +1,61 @@
+
+- [Index Sorting](#index-sorting)
+    + [Why Sorting](#why-sorting)
+        * [Compression](#compression)
+        * [Top-N Optimization](#top-n-optimization)
+        * [Pruning](#pruning)
+        * [Other](#other)
+    + [Usage](#usage)
+
+# Index Sorting
+
+Tantivy allows you to sort the index according to a property.
+
+## Why Sorting
+
+Presorting an index has several advantages:
+
+###### Compression
+
+When data is sorted it is easier to compress the data. E.g. the numbers sequence [5, 2, 3, 1, 4] would be sorted to [1, 2, 3, 4, 5]. 
+If we apply delta encoding this list would be unsorted [5, -3, 1, -2, 3] vs. [1, 1, 1, 1, 1].
+Compression ratio is mainly affected on the fast field of the sorted property, every thing else is likely unaffected. 
+###### Top-N Optimization
+
+When data is presorted by a field and search queries request sorting by the same field, we can leverage the natural order of the documents. 
+E.g. if the data is sorted by timestamp and want the top n newest docs containing a term, we can simply leveraging the order of the docids.
+
+Note: Tantivy 0.16 does not do this optimization yet.
+
+###### Pruning
+
+Let's say we want all documents and want to apply the filter `>= 2010-08-11`. When the data is sorted, we could make a lookup in the fast field to find the docid range and use this as the filter.
+
+Note: Tantivy 0.16 does not do this optimization yet.
+
+###### Other?
+
+In principle there are many algorithms possible that exploit the monotonically increasing nature. (aggregations maybe?)
+
+## Usage
+The index sorting can be configured setting [`sort_by_field`](https://github.com/quickwit-oss/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/core/index_meta.rs#L238) on `IndexSettings` and passing it to a `IndexBuilder`. As of tantvy 0.16 only fast fields are allowed to be used.
+
+```
+let settings = IndexSettings {
+    sort_by_field: Some(IndexSortByField {
+        field: "intval".to_string(),
+        order: Order::Desc,
+    }),
+    ..Default::default()
+};
+let mut index_builder = Index::builder().schema(schema);
+index_builder = index_builder.settings(settings);
+let index = index_builder.create_in_ram().unwrap();
+```
+
+## Implementation details
+
+Sorting an index is applied in the serialization step. In general there are two serialization steps: [Finishing a single segment](https://github.com/quickwit-oss/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/indexer/segment_writer.rs#L338) and [merging multiple segments](https://github.com/quickwit-oss/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/indexer/merger.rs#L1073).
+
+In both cases we generate a docid mapping reflecting the sort. This mapping is used when serializing the different components (doc store, fastfields, posting list, normfield, facets).
+
--- a/examples/basic_search.rs
+++ b/examples/basic_search.rs
@@ -73,7 +73,7 @@ fn main() -> tantivy::Result<()> {
    // multithreaded.
    //
    // Here we give tantivy a budget of `50MB`.
-    // Using a bigger heap for the indexer may increase
+    // Using a bigger memory_arena for the indexer may increase
    // throughput, but 50 MB is already plenty.
    let mut index_writer = index.writer(50_000_000)?;

@@ -91,12 +91,12 @@ fn main() -> tantivy::Result<()> {
    old_man_doc.add_text(title, "The Old Man and the Sea");
    old_man_doc.add_text(
        body,
-        "He was an old man who fished alone in a skiff in the Gulf Stream and \
-         he had gone eighty-four days now without taking a fish.",
+        "He was an old man who fished alone in a skiff in the Gulf Stream and he had gone \
+         eighty-four days now without taking a fish.",
    );

    // ... and add it to the `IndexWriter`.
-    index_writer.add_document(old_man_doc);
+    index_writer.add_document(old_man_doc)?;

    // For convenience, tantivy also comes with a macro to
    // reduce the boilerplate above.
@@ -110,7 +110,7 @@ fn main() -> tantivy::Result<()> {
            fresh and green with every spring, carrying in their lower leaf junctures the \
            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
            limbs and branches that arch over the pool"
-    ));
+    ))?;

    // Multivalued field just need to be repeated.
    index_writer.add_document(doc!(
@@ -120,7 +120,7 @@ fn main() -> tantivy::Result<()> {
             enterprise which you have regarded with such evil forebodings.  I arrived here \
             yesterday, and my first task is to assure my dear sister of my welfare and \
             increasing confidence in the success of my undertaking."
-    ));
+    ))?;

    // This is an example, so we will only index 3 documents
    // here. You can check out tantivy's tutorial to index
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -12,8 +12,7 @@
 use tantivy::collector::{Collector, SegmentCollector};
 use tantivy::fastfield::{DynamicFastFieldReader, FastFieldReader};
 use tantivy::query::QueryParser;
-use tantivy::schema::Field;
-use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
+use tantivy::schema::{Field, Schema, FAST, INDEXED, TEXT};
 use tantivy::{doc, Index, Score, SegmentReader};

 #[derive(Default)]
@@ -86,12 +85,10 @@ impl Collector for StatsCollector {

    fn merge_fruits(&self, segment_stats: Vec<Option<Stats>>) -> tantivy::Result<Option<Stats>> {
        let mut stats = Stats::default();
-        for segment_stats_opt in segment_stats {
-            if let Some(segment_stats) = segment_stats_opt {
-                stats.count += segment_stats.count;
-                stats.sum += segment_stats.sum;
-                stats.squared_sum += segment_stats.squared_sum;
-            }
+        for segment_stats in segment_stats.into_iter().flatten() {
+            stats.count += segment_stats.count;
+            stats.sum += segment_stats.sum;
+            stats.squared_sum += segment_stats.squared_sum;
        }
        Ok(stats.non_zero_count())
    }
@@ -147,23 +144,23 @@ fn main() -> tantivy::Result<()> {
        product_description => "While it is ok for short distance travel, this broom \
        was designed quiditch. It will up your game.",
        price => 30_200u64
-    ));
+    ))?;
    index_writer.add_document(doc!(
        product_name => "Turbulobroom",
        product_description => "You might have heard of this broom before : it is the sponsor of the Wales team.\
            You'll enjoy its sharp turns, and rapid acceleration",
        price => 29_240u64
-    ));
+    ))?;
    index_writer.add_document(doc!(
        product_name => "Broomio",
        product_description => "Great value for the price. This broom is a market favorite",
        price => 21_240u64
-    ));
+    ))?;
    index_writer.add_document(doc!(
        product_name => "Whack a Mole",
        product_description => "Prime quality bat.",
        price => 5_200u64
-    ));
+    ))?;
    index_writer.commit()?;

    let reader = index.reader()?;
--- a/examples/custom_tokenizer.rs
+++ b/examples/custom_tokenizer.rs
@@ -62,13 +62,13 @@ fn main() -> tantivy::Result<()> {
    // multithreaded.
    //
    // Here we use a buffer of 50MB per thread. Using a bigger
-    // heap for the indexer can increase its throughput.
+    // memory arena for the indexer can increase its throughput.
    let mut index_writer = index.writer(50_000_000)?;
    index_writer.add_document(doc!(
    title => "The Old Man and the Sea",
    body => "He was an old man who fished alone in a skiff in the Gulf Stream and \
     he had gone eighty-four days now without taking a fish."
-    ));
+    ))?;
    index_writer.add_document(doc!(
    title => "Of Mice and Men",
       body => r#"A few miles south of Soledad, the Salinas River drops in close to the hillside
@@ -79,14 +79,14 @@ fn main() -> tantivy::Result<()> {
                fresh and green with every spring, carrying in their lower leaf junctures the
                debris of the winter’s flooding; and sycamores with mottled, white, recumbent
                limbs and branches that arch over the pool"#
-    ));
+    ))?;
    index_writer.add_document(doc!(
    title => "Frankenstein",
        body => r#"You will rejoice to hear that no disaster has accompanied the commencement of an
                enterprise which you have regarded with such evil forebodings.  I arrived here
                yesterday, and my first task is to assure my dear sister of my welfare and
                increasing confidence in the success of my undertaking."#
-    ));
+    ))?;
    index_writer.commit()?;

    let reader = index.reader()?;
--- a/examples/deleting_updating_documents.rs
+++ b/examples/deleting_updating_documents.rs
@@ -56,8 +56,9 @@ fn main() -> tantivy::Result<()> {
    // If it is `text`, let's make sure to keep it `raw` and let's avoid
    // running any text processing on it.
    // This is done by associating this field to the tokenizer named `raw`.
-    // Rather than building our [`TextOptions`](//docs.rs/tantivy/~0/tantivy/schema/struct.TextOptions.html) manually,
-    // We use the `STRING` shortcut. `STRING` stands for indexed (without term frequency or positions)
+    // Rather than building our
+    // [`TextOptions`](//docs.rs/tantivy/~0/tantivy/schema/struct.TextOptions.html) manually, We
+    // use the `STRING` shortcut. `STRING` stands for indexed (without term frequency or positions)
    // and untokenized.
    //
    // Because we also want to be able to see this `id` in our returned documents,
@@ -76,15 +77,15 @@ fn main() -> tantivy::Result<()> {
    index_writer.add_document(doc!(
        isbn => "978-0099908401",
        title => "The old Man and the see"
-    ));
+    ))?;
    index_writer.add_document(doc!(
        isbn => "978-0140177398",
        title => "Of Mice and Men",
-    ));
+    ))?;
    index_writer.add_document(doc!(
       title => "Frankentein", //< Oops there is a typo here.
       isbn => "978-9176370711",
-    ));
+    ))?;
    index_writer.commit()?;
    let reader = index.reader()?;

@@ -122,7 +123,7 @@ fn main() -> tantivy::Result<()> {
    index_writer.add_document(doc!(
       title => "Frankenstein",
       isbn => "978-9176370711",
-    ));
+    ))?;

    // You are guaranteed that your clients will only observe your index in
    // the state it was in after a commit.
--- a/examples/faceted_search.rs
+++ b/examples/faceted_search.rs
@@ -23,7 +23,7 @@ fn main() -> tantivy::Result<()> {

    let name = schema_builder.add_text_field("felin_name", TEXT | STORED);
    // this is our faceted field: its scientific classification
-    let classification = schema_builder.add_facet_field("classification", INDEXED);
+    let classification = schema_builder.add_facet_field("classification", FacetOptions::default());

    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema);
@@ -35,35 +35,35 @@ fn main() -> tantivy::Result<()> {
    index_writer.add_document(doc!(
        name => "Cat",
        classification => Facet::from("/Felidae/Felinae/Felis")
-    ));
+    ))?;
    index_writer.add_document(doc!(
        name => "Canada lynx",
        classification => Facet::from("/Felidae/Felinae/Lynx")
-    ));
+    ))?;
    index_writer.add_document(doc!(
        name => "Cheetah",
        classification => Facet::from("/Felidae/Felinae/Acinonyx")
-    ));
+    ))?;
    index_writer.add_document(doc!(
        name => "Tiger",
        classification => Facet::from("/Felidae/Pantherinae/Panthera")
-    ));
+    ))?;
    index_writer.add_document(doc!(
        name => "Lion",
        classification => Facet::from("/Felidae/Pantherinae/Panthera")
-    ));
+    ))?;
    index_writer.add_document(doc!(
        name => "Jaguar",
        classification => Facet::from("/Felidae/Pantherinae/Panthera")
-    ));
+    ))?;
    index_writer.add_document(doc!(
        name => "Sunda clouded leopard",
        classification => Facet::from("/Felidae/Pantherinae/Neofelis")
-    ));
+    ))?;
    index_writer.add_document(doc!(
        name => "Fossa",
        classification => Facet::from("/Eupleridae/Cryptoprocta")
-    ));
+    ))?;
    index_writer.commit()?;

    let reader = index.reader()?;
--- a/examples/faceted_search_with_tweaked_score.rs
+++ b/examples/faceted_search_with_tweaked_score.rs
@@ -1,15 +1,15 @@
 use std::collections::HashSet;
+
 use tantivy::collector::TopDocs;
-use tantivy::doc;
 use tantivy::query::BooleanQuery;
 use tantivy::schema::*;
-use tantivy::{DocId, Index, Score, SegmentReader};
+use tantivy::{doc, DocId, Index, Score, SegmentReader};

 fn main() -> tantivy::Result<()> {
    let mut schema_builder = Schema::builder();

    let title = schema_builder.add_text_field("title", STORED);
-    let ingredient = schema_builder.add_facet_field("ingredient", INDEXED);
+    let ingredient = schema_builder.add_facet_field("ingredient", FacetOptions::default());

    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema);
@@ -20,14 +20,14 @@ fn main() -> tantivy::Result<()> {
        title => "Fried egg",
        ingredient => Facet::from("/ingredient/egg"),
        ingredient => Facet::from("/ingredient/oil"),
-    ));
+    ))?;
    index_writer.add_document(doc!(
        title => "Scrambled egg",
        ingredient => Facet::from("/ingredient/egg"),
        ingredient => Facet::from("/ingredient/butter"),
        ingredient => Facet::from("/ingredient/milk"),
        ingredient => Facet::from("/ingredient/salt"),
-    ));
+    ))?;
    index_writer.add_document(doc!(
        title => "Egg rolls",
        ingredient => Facet::from("/ingredient/egg"),
@@ -36,7 +36,7 @@ fn main() -> tantivy::Result<()> {
        ingredient => Facet::from("/ingredient/oil"),
        ingredient => Facet::from("/ingredient/tortilla-wrap"),
        ingredient => Facet::from("/ingredient/mushroom"),
-    ));
+    ))?;
    index_writer.commit()?;

    let reader = index.reader()?;
@@ -87,7 +87,7 @@ fn main() -> tantivy::Result<()> {
                    .unwrap()
                    .get_first(title)
                    .unwrap()
-                    .text()
+                    .as_text()
                    .unwrap()
                    .to_owned()
            })
--- a/examples/integer_range_search.rs
+++ b/examples/integer_range_search.rs
@@ -7,7 +7,7 @@ use tantivy::query::RangeQuery;
 use tantivy::schema::{Schema, INDEXED};
 use tantivy::{doc, Index, Result};

-fn run() -> Result<()> {
+fn main() -> Result<()> {
    // For the sake of simplicity, this schema will only have 1 field
    let mut schema_builder = Schema::builder();

@@ -19,7 +19,7 @@ fn run() -> Result<()> {
    {
        let mut index_writer = index.writer_with_num_threads(1, 6_000_000)?;
        for year in 1950u64..2019u64 {
-            index_writer.add_document(doc!(year_field => year));
+            index_writer.add_document(doc!(year_field => year))?;
        }
        index_writer.commit()?;
        // The index will be a range of years
@@ -33,7 +33,3 @@ fn run() -> Result<()> {
    assert_eq!(num_60s_books, 10);
    Ok(())
 }
-
-fn main() {
-    run().unwrap()
-}
--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -25,9 +25,9 @@ fn main() -> tantivy::Result<()> {
    let index = Index::create_in_ram(schema);

    let mut index_writer = index.writer_with_num_threads(1, 50_000_000)?;
-    index_writer.add_document(doc!(title => "The Old Man and the Sea"));
-    index_writer.add_document(doc!(title => "Of Mice and Men"));
-    index_writer.add_document(doc!(title => "The modern Promotheus"));
+    index_writer.add_document(doc!(title => "The Old Man and the Sea"))?;
+    index_writer.add_document(doc!(title => "Of Mice and Men"))?;
+    index_writer.add_document(doc!(title => "The modern Promotheus"))?;
    index_writer.commit()?;

    let reader = index.reader()?;
@@ -52,11 +52,11 @@ fn main() -> tantivy::Result<()> {
        let term_the = Term::from_field_text(title, "the");

        // This segment posting object is like a cursor over the documents matching the term.
-        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies
-        // and positions.
+        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term
+        // frequencies and positions.
        //
-        // If you don't need all this information, you may get better performance by decompressing less
-        // information.
+        // If you don't need all this information, you may get better performance by decompressing
+        // less information.
        if let Some(mut segment_postings) =
            inverted_index.read_postings(&term_the, IndexRecordOption::WithFreqsAndPositions)?
        {
@@ -109,11 +109,11 @@ fn main() -> tantivy::Result<()> {
        let inverted_index = segment_reader.inverted_index(title)?;

        // This segment posting object is like a cursor over the documents matching the term.
-        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies
-        // and positions.
+        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term
+        // frequencies and positions.
        //
-        // If you don't need all this information, you may get better performance by decompressing less
-        // information.
+        // If you don't need all this information, you may get better performance by decompressing
+        // less information.
        if let Some(mut block_segment_postings) =
            inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic)?
        {
--- a/examples/multiple_producer.rs
+++ b/examples/multiple_producer.rs
@@ -28,8 +28,9 @@
 use std::sync::{Arc, RwLock};
 use std::thread;
 use std::time::Duration;
+
 use tantivy::schema::{Schema, STORED, TEXT};
-use tantivy::{doc, Index, IndexWriter, Opstamp};
+use tantivy::{doc, Index, IndexWriter, Opstamp, TantivyError};

 fn main() -> tantivy::Result<()> {
    // # Defining the schema
@@ -59,10 +60,11 @@ fn main() -> tantivy::Result<()> {
                        fresh and green with every spring, carrying in their lower leaf junctures the \
                        debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
                        limbs and branches that arch over the pool"
-                    ));
+                    ))?;
            println!("add doc {} from thread 1 - opstamp {}", i, opstamp);
            thread::sleep(Duration::from_millis(20));
        }
+        Result::<(), TantivyError>::Ok(())
    });

    // # Second indexing thread.
@@ -78,19 +80,21 @@ fn main() -> tantivy::Result<()> {
                index_writer_rlock.add_document(doc!(
                    title => "Manufacturing consent",
                    body => "Some great book description..."
-                ))
+                ))?
            };
            println!("add doc {} from thread 2 - opstamp {}", i, opstamp);
            thread::sleep(Duration::from_millis(10));
        }
+        Result::<(), TantivyError>::Ok(())
    });

    // # In the main thread, we commit 10 times, once every 500ms.
    for _ in 0..10 {
        let opstamp: Opstamp = {
-            // Committing or rollbacking on the other hand requires write lock. This will block other threads.
+            // Committing or rollbacking on the other hand requires write lock. This will block
+            // other threads.
            let mut index_writer_wlock = index_writer.write().unwrap();
-            index_writer_wlock.commit().unwrap()
+            index_writer_wlock.commit()?
        };
        println!("committed with opstamp {}", opstamp);
        thread::sleep(Duration::from_millis(500));
--- a/examples/pre_tokenized_text.rs
+++ b/examples/pre_tokenized_text.rs
@@ -68,7 +68,7 @@ fn main() -> tantivy::Result<()> {
    let old_man_doc = doc!(title => title_tok, body => body_tok);

    // ... now let's just add it to the IndexWriter
-    index_writer.add_document(old_man_doc);
+    index_writer.add_document(old_man_doc)?;

    // Pretokenized text can also be fed as JSON
    let short_man_json = r#"{
@@ -84,7 +84,7 @@ fn main() -> tantivy::Result<()> {

    let short_man_doc = schema.parse_document(short_man_json)?;

-    index_writer.add_document(short_man_doc);
+    index_writer.add_document(short_man_doc)?;

    // Let's commit changes
    index_writer.commit()?;
@@ -106,9 +106,7 @@ fn main() -> tantivy::Result<()> {
        IndexRecordOption::Basic,
    );

-    let (top_docs, count) = searcher
-        .search(&query, &(TopDocs::with_limit(2), Count))
-        .unwrap();
+    let (top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count))?;

    assert_eq!(count, 2);

@@ -129,9 +127,7 @@ fn main() -> tantivy::Result<()> {
        IndexRecordOption::Basic,
    );

-    let (_top_docs, count) = searcher
-        .search(&query, &(TopDocs::with_limit(2), Count))
-        .unwrap();
+    let (_top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count))?;

    assert_eq!(count, 0);

--- a/examples/snippet.rs
+++ b/examples/snippet.rs
@@ -40,7 +40,7 @@ fn main() -> tantivy::Result<()> {
            fresh and green with every spring, carrying in their lower leaf junctures the \
            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
            limbs and branches that arch over the pool"
-    ));
+    ))?;
    // ...
    index_writer.commit()?;

@@ -57,7 +57,10 @@ fn main() -> tantivy::Result<()> {
        let doc = searcher.doc(doc_address)?;
        let snippet = snippet_generator.snippet_from_doc(&doc);
        println!("Document score {}:", score);
-        println!("title: {}", doc.get_first(title).unwrap().text().unwrap());
+        println!(
+            "title: {}",
+            doc.get_first(title).unwrap().as_text().unwrap()
+        );
        println!("snippet: {}", snippet.to_html());
        println!("custom highlighting: {}", highlight(snippet));
    }
@@ -70,13 +73,13 @@ fn highlight(snippet: Snippet) -> String {
    let mut start_from = 0;

    for fragment_range in snippet.highlighted() {
-        result.push_str(&snippet.fragments()[start_from..fragment_range.start]);
+        result.push_str(&snippet.fragment()[start_from..fragment_range.start]);
        result.push_str(" --> ");
-        result.push_str(&snippet.fragments()[fragment_range.clone()]);
+        result.push_str(&snippet.fragment()[fragment_range.clone()]);
        result.push_str(" <-- ");
        start_from = fragment_range.end;
    }

-    result.push_str(&snippet.fragments()[start_from..]);
+    result.push_str(&snippet.fragment()[start_from..]);
    result
 }
--- a/examples/stop_words.rs
+++ b/examples/stop_words.rs
@@ -68,7 +68,7 @@ fn main() -> tantivy::Result<()> {
    title => "The Old Man and the Sea",
    body => "He was an old man who fished alone in a skiff in the Gulf Stream and \
     he had gone eighty-four days now without taking a fish."
-    ));
+    ))?;

    index_writer.add_document(doc!(
    title => "Of Mice and Men",
@@ -80,7 +80,7 @@ fn main() -> tantivy::Result<()> {
            fresh and green with every spring, carrying in their lower leaf junctures the \
            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
            limbs and branches that arch over the pool"
-    ));
+    ))?;

    index_writer.add_document(doc!(
    title => "Frankenstein",
@@ -88,7 +88,7 @@ fn main() -> tantivy::Result<()> {
             enterprise which you have regarded with such evil forebodings.  I arrived here \
             yesterday, and my first task is to assure my dear sister of my welfare and \
             increasing confidence in the success of my undertaking."
-    ));
+    ))?;

    index_writer.commit()?;

--- a/examples/warmer.rs
+++ b/examples/warmer.rs
@@ -0,0 +1,224 @@
+use std::cmp::Reverse;
+use std::collections::{HashMap, HashSet};
+use std::sync::{Arc, RwLock, Weak};
+
+use tantivy::collector::TopDocs;
+use tantivy::fastfield::FastFieldReader;
+use tantivy::query::QueryParser;
+use tantivy::schema::{Field, Schema, FAST, TEXT};
+use tantivy::{
+    doc, DocAddress, DocId, Index, IndexReader, Opstamp, Searcher, SearcherGeneration, SegmentId,
+    SegmentReader, Warmer,
+};
+
+// This example shows how warmers can be used to
+// load a values from an external sources using the Warmer API.
+//
+// In this example, we assume an e-commerce search engine.
+
+type ProductId = u64;
+
+/// Price
+type Price = u32;
+
+pub trait PriceFetcher: Send + Sync + 'static {
+    fn fetch_prices(&self, product_ids: &[ProductId]) -> Vec<Price>;
+}
+
+struct DynamicPriceColumn {
+    field: Field,
+    price_cache: RwLock<HashMap<(SegmentId, Option<Opstamp>), Arc<Vec<Price>>>>,
+    price_fetcher: Box<dyn PriceFetcher>,
+}
+
+impl DynamicPriceColumn {
+    pub fn with_product_id_field<T: PriceFetcher>(field: Field, price_fetcher: T) -> Self {
+        DynamicPriceColumn {
+            field,
+            price_cache: Default::default(),
+            price_fetcher: Box::new(price_fetcher),
+        }
+    }
+
+    pub fn price_for_segment(&self, segment_reader: &SegmentReader) -> Option<Arc<Vec<Price>>> {
+        let segment_key = (segment_reader.segment_id(), segment_reader.delete_opstamp());
+        self.price_cache.read().unwrap().get(&segment_key).cloned()
+    }
+}
+impl Warmer for DynamicPriceColumn {
+    fn warm(&self, searcher: &Searcher) -> tantivy::Result<()> {
+        for segment in searcher.segment_readers() {
+            let key = (segment.segment_id(), segment.delete_opstamp());
+            let product_id_reader = segment.fast_fields().u64(self.field)?;
+            let product_ids: Vec<ProductId> = segment
+                .doc_ids_alive()
+                .map(|doc| product_id_reader.get(doc))
+                .collect();
+            let mut prices_it = self.price_fetcher.fetch_prices(&product_ids).into_iter();
+            let mut price_vals: Vec<Price> = Vec::new();
+            for doc in 0..segment.max_doc() {
+                if segment.is_deleted(doc) {
+                    price_vals.push(0);
+                } else {
+                    price_vals.push(prices_it.next().unwrap())
+                }
+            }
+            self.price_cache
+                .write()
+                .unwrap()
+                .insert(key, Arc::new(price_vals));
+        }
+        Ok(())
+    }
+
+    fn garbage_collect(&self, live_generations: &[&SearcherGeneration]) {
+        let live_segment_id_and_delete_ops: HashSet<(SegmentId, Option<Opstamp>)> =
+            live_generations
+                .iter()
+                .flat_map(|gen| gen.segments())
+                .map(|(&segment_id, &opstamp)| (segment_id, opstamp))
+                .collect();
+        let mut price_cache_wrt = self.price_cache.write().unwrap();
+        // let price_cache = std::mem::take(&mut *price_cache_wrt);
+        // Drain would be nicer here.
+        *price_cache_wrt = std::mem::take(&mut *price_cache_wrt)
+            .into_iter()
+            .filter(|(seg_id_and_op, _)| !live_segment_id_and_delete_ops.contains(seg_id_and_op))
+            .collect();
+    }
+}
+
+/// For the sake of this example, the table is just an editable HashMap behind a RwLock.
+/// This map represents a map (ProductId -> Price)
+///
+/// In practise, it could be fetching things from an external service, like a SQL table.
+#[derive(Default, Clone)]
+pub struct ExternalPriceTable {
+    prices: Arc<RwLock<HashMap<ProductId, Price>>>,
+}
+
+impl ExternalPriceTable {
+    pub fn update_price(&self, product_id: ProductId, price: Price) {
+        let mut prices_wrt = self.prices.write().unwrap();
+        prices_wrt.insert(product_id, price);
+    }
+}
+
+impl PriceFetcher for ExternalPriceTable {
+    fn fetch_prices(&self, product_ids: &[ProductId]) -> Vec<Price> {
+        let prices_read = self.prices.read().unwrap();
+        product_ids
+            .iter()
+            .map(|product_id| prices_read.get(product_id).cloned().unwrap_or(0))
+            .collect()
+    }
+}
+
+fn main() -> tantivy::Result<()> {
+    // Declaring our schema.
+    let mut schema_builder = Schema::builder();
+    // The product id is assumed to be a primary id for our external price source.
+    let product_id = schema_builder.add_u64_field("product_id", FAST);
+    let text = schema_builder.add_text_field("text", TEXT);
+    let schema: Schema = schema_builder.build();
+
+    let price_table = ExternalPriceTable::default();
+    let price_dynamic_column = Arc::new(DynamicPriceColumn::with_product_id_field(
+        product_id,
+        price_table.clone(),
+    ));
+    price_table.update_price(OLIVE_OIL, 12);
+    price_table.update_price(GLOVES, 13);
+    price_table.update_price(SNEAKERS, 80);
+
+    const OLIVE_OIL: ProductId = 323423;
+    const GLOVES: ProductId = 3966623;
+    const SNEAKERS: ProductId = 23222;
+
+    let index = Index::create_in_ram(schema);
+    let mut writer = index.writer_with_num_threads(1, 10_000_000)?;
+    writer.add_document(doc!(product_id=>OLIVE_OIL, text=>"cooking olive oil from greece"))?;
+    writer.add_document(doc!(product_id=>GLOVES, text=>"kitchen gloves, perfect for cooking"))?;
+    writer.add_document(doc!(product_id=>SNEAKERS, text=>"uber sweet sneakers"))?;
+    writer.commit()?;
+
+    let warmers: Vec<Weak<dyn Warmer>> = vec![Arc::downgrade(
+        &(price_dynamic_column.clone() as Arc<dyn Warmer>),
+    )];
+    let reader: IndexReader = index
+        .reader_builder()
+        .warmers(warmers)
+        .num_searchers(1)
+        .try_into()?;
+    reader.reload()?;
+
+    let query_parser = QueryParser::for_index(&index, vec![text]);
+    let query = query_parser.parse_query("cooking")?;
+
+    let searcher = reader.searcher();
+    let score_by_price = move |segment_reader: &SegmentReader| {
+        let price = price_dynamic_column
+            .price_for_segment(segment_reader)
+            .unwrap();
+        move |doc_id: DocId| Reverse(price[doc_id as usize])
+    };
+
+    let most_expensive_first = TopDocs::with_limit(10).custom_score(score_by_price);
+
+    let hits = searcher.search(&query, &most_expensive_first)?;
+    assert_eq!(
+        &hits,
+        &[
+            (
+                Reverse(12u32),
+                DocAddress {
+                    segment_ord: 0,
+                    doc_id: 0u32
+                }
+            ),
+            (
+                Reverse(13u32),
+                DocAddress {
+                    segment_ord: 0,
+                    doc_id: 1u32
+                }
+            ),
+        ]
+    );
+
+    // Olive oil just got more expensive!
+    price_table.update_price(OLIVE_OIL, 15);
+
+    // The price update are directly reflected on `reload`.
+    //
+    // Be careful here though!...
+    // You may have spotted that we are still using the same `Searcher`.
+    //
+    // It is up to the `Warmer` implementer to decide how
+    // to control this behavior.
+
+    reader.reload()?;
+
+    let hits_with_new_prices = searcher.search(&query, &most_expensive_first)?;
+    assert_eq!(
+        &hits_with_new_prices,
+        &[
+            (
+                Reverse(13u32),
+                DocAddress {
+                    segment_ord: 0,
+                    doc_id: 1u32
+                }
+            ),
+            (
+                Reverse(15u32),
+                DocAddress {
+                    segment_ord: 0,
+                    doc_id: 0u32
+                }
+            ),
+        ]
+    );
+
+    Ok(())
+}
--- a/fastfield_codecs/Cargo.toml
+++ b/fastfield_codecs/Cargo.toml
@@ -9,8 +9,8 @@ description = "Fast field codecs used by tantivy"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-common = { path = "../common/" }
-tantivy-bitpacker = { path = "../bitpacker/" }
+common = { version = "0.1", path = "../common/", package = "tantivy-common" }
+tantivy-bitpacker = { version="0.1.1", path = "../bitpacker/" }
 prettytable-rs = {version="0.8.0", optional= true}
 rand = {version="0.8.3", optional= true}

--- a/fastfield_codecs/benches/bench.rs
+++ b/fastfield_codecs/benches/bench.rs
@@ -4,14 +4,14 @@ extern crate test;

 #[cfg(test)]
 mod tests {
-    use fastfield_codecs::{
-        bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer},
-        linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer},
-        multilinearinterpol::{
-            MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
-        },
-        *,
+    use fastfield_codecs::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
+    use fastfield_codecs::linearinterpol::{
+        LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
    };
+    use fastfield_codecs::multilinearinterpol::{
+        MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
+    };
+    use fastfield_codecs::*;

    fn get_data() -> Vec<u64> {
        let mut data: Vec<_> = (100..55000_u64)
--- a/fastfield_codecs/src/bitpacked.rs
+++ b/fastfield_codecs/src/bitpacked.rs
@@ -1,13 +1,9 @@
-use crate::FastFieldCodecReader;
-use crate::FastFieldCodecSerializer;
-use crate::FastFieldDataAccess;
-use crate::FastFieldStats;
-use common::BinarySerializable;
 use std::io::{self, Write};
-use tantivy_bitpacker::compute_num_bits;
-use tantivy_bitpacker::BitPacker;

-use tantivy_bitpacker::BitUnpacker;
+use common::BinarySerializable;
+use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
+
+use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};

 /// Depending on the field type, a different
 /// fast field is required.
--- a/fastfield_codecs/src/lib.rs
+++ b/fastfield_codecs/src/lib.rs
@@ -53,7 +53,8 @@ pub trait FastFieldCodecSerializer {
 pub trait FastFieldDataAccess {
    /// Return the value associated to the given position.
    ///
-    /// Whenever possible use the Iterator passed to the fastfield creation instead, for performance reasons.
+    /// Whenever possible use the Iterator passed to the fastfield creation instead, for performance
+    /// reasons.
    ///
    /// # Panics
    ///
@@ -82,12 +83,10 @@ impl FastFieldDataAccess for Vec<u64> {

 #[cfg(test)]
 mod tests {
-    use crate::{
-        bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer},
-        linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer},
-        multilinearinterpol::{
-            MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
-        },
+    use crate::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
+    use crate::linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer};
+    use crate::multilinearinterpol::{
+        MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
    };

    pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
@@ -118,7 +117,7 @@ mod tests {
                );
            }
        }
-        let actual_compression = data.len() as f32 / out.len() as f32;
+        let actual_compression = out.len() as f32 / (data.len() as f32 * 8.0);
        (estimation, actual_compression)
    }
    pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
--- a/fastfield_codecs/src/linearinterpol.rs
+++ b/fastfield_codecs/src/linearinterpol.rs
@@ -1,15 +1,10 @@
-use crate::FastFieldCodecReader;
-use crate::FastFieldCodecSerializer;
-use crate::FastFieldDataAccess;
-use crate::FastFieldStats;
 use std::io::{self, Read, Write};
 use std::ops::Sub;
-use tantivy_bitpacker::compute_num_bits;
-use tantivy_bitpacker::BitPacker;

-use common::BinarySerializable;
-use common::FixedSize;
-use tantivy_bitpacker::BitUnpacker;
+use common::{BinarySerializable, FixedSize};
+use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
+
+use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};

 /// Depending on the field type, a different
 /// fast field is required.
@@ -137,7 +132,7 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
                // will be offset to 0
                offset = offset.max(calculated_value - actual_value);
            } else {
-                //positive value no offset reuqired
+                // positive value no offset reuqired
                rel_positive_max = rel_positive_max.max(actual_value - calculated_value);
            }
        }
@@ -171,7 +166,7 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
        stats: FastFieldStats,
    ) -> bool {
        if stats.num_vals < 3 {
-            return false; //disable compressor for this case
+            return false; // disable compressor for this case
        }
        // On serialisation the offset is added to the actual value.
        // We need to make sure this won't run into overflow calculation issues.
@@ -211,8 +206,8 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
            .max()
            .unwrap_or(0);

-        // the theory would be that we don't have the actual max_distance, but we are close within 50%
-        // threshold.
+        // the theory would be that we don't have the actual max_distance, but we are close within
+        // 50% threshold.
        // It is multiplied by 2 because in a log case scenario the line would be as much above as
        // below. So the offset would = max_distance
        //
@@ -239,11 +234,21 @@ mod tests {
    use super::*;
    use crate::tests::get_codec_test_data_sets;

-    fn create_and_validate(data: &[u64], name: &str) {
+    fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
        crate::tests::create_and_validate::<
            LinearInterpolFastFieldSerializer,
            LinearInterpolFastFieldReader,
-        >(data, name);
+        >(data, name)
+    }
+
+    #[test]
+    fn test_compression() {
+        let data = (10..=6_000_u64).collect::<Vec<_>>();
+        let (estimate, actual_compression) =
+            create_and_validate(&data, "simple monotonically large");
+
+        assert!(actual_compression < 0.01);
+        assert!(estimate < 0.01);
    }

    #[test]
--- a/fastfield_codecs/src/main.rs
+++ b/fastfield_codecs/src/main.rs
@@ -1,10 +1,8 @@
 #[macro_use]
 extern crate prettytable;
-use fastfield_codecs::{
-    linearinterpol::LinearInterpolFastFieldSerializer,
-    multilinearinterpol::MultiLinearInterpolFastFieldSerializer, FastFieldCodecSerializer,
-    FastFieldStats,
-};
+use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
+use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
+use fastfield_codecs::{FastFieldCodecSerializer, FastFieldStats};
 use prettytable::{Cell, Row, Table};

 fn main() {
@@ -24,7 +22,7 @@ fn main() {
        );
        results.push(res);

-        //let best_estimation_codec = results
+        // let best_estimation_codec = results
        //.iter()
        //.min_by(|res1, res2| res1.partial_cmp(&res2).unwrap())
        //.unwrap();
@@ -41,7 +39,6 @@ fn main() {
            } else {
                (est.to_string(), comp.to_string())
            };
-            #[allow(clippy::all)]
            let style = if comp == best_compression_ratio_codec.1 {
                "Fb"
            } else {
@@ -49,7 +46,7 @@ fn main() {
            };

            table.add_row(Row::new(vec![
-                Cell::new(&name.to_string()).style_spec("bFg"),
+                Cell::new(name).style_spec("bFg"),
                Cell::new(&ratio_cell).style_spec(style),
                Cell::new(&est_cell).style_spec(""),
            ]));
@@ -73,7 +70,7 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
            current_cumulative
        })
        .collect::<Vec<_>>();
-    //let data = (1..=200000_u64).map(|num| num + num).collect::<Vec<_>>();
+    // let data = (1..=200000_u64).map(|num| num + num).collect::<Vec<_>>();
    data_and_names.push((data, "Monotonically increasing concave"));

    let mut current_cumulative = 0;
--- a/fastfield_codecs/src/multilinearinterpol.rs
+++ b/fastfield_codecs/src/multilinearinterpol.rs
@@ -1,16 +1,22 @@
-use crate::FastFieldCodecReader;
-use crate::FastFieldCodecSerializer;
-use crate::FastFieldDataAccess;
-use crate::FastFieldStats;
-use common::CountingWriter;
+//! MultiLinearInterpol compressor uses linear interpolation to guess a values and stores the
+//! offset, but in blocks of 512.
+//!
+//! With a CHUNK_SIZE of 512 and 29 byte metadata per block, we get a overhead for metadata of 232 /
+//! 512 = 0,45 bits per element. The additional space required per element in a block is the the
+//! maximum deviation of the linear interpolation estimation function.
+//!
+//! E.g. if the maximum deviation of an element is 12, all elements cost 4bits.
+//!
+//! Size per block:
+//! Num Elements * Maximum Deviation from Interpolation + 29 Byte Metadata
+
 use std::io::{self, Read, Write};
 use std::ops::Sub;
-use tantivy_bitpacker::compute_num_bits;
-use tantivy_bitpacker::BitPacker;

-use common::BinarySerializable;
-use common::DeserializeFrom;
-use tantivy_bitpacker::BitUnpacker;
+use common::{BinarySerializable, CountingWriter, DeserializeFrom};
+use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
+
+use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};

 const CHUNK_SIZE: u64 = 512;

@@ -43,7 +49,7 @@ struct Function {
 impl Function {
    fn calc_slope(&mut self) {
        let num_vals = self.end_pos - self.start_pos;
-        get_slope(self.value_start_pos, self.value_end_pos, num_vals);
+        self.slope = get_slope(self.value_start_pos, self.value_end_pos, num_vals);
    }
    // split the interpolation into two function, change self and return the second split
    fn split(&mut self, split_pos: u64, split_pos_value: u64) -> Function {
@@ -238,11 +244,11 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
                );
                if calculated_value > actual_value {
                    // negative value we need to apply an offset
-                    // we ignore negative values in the max value calculation, because negative values
-                    // will be offset to 0
+                    // we ignore negative values in the max value calculation, because negative
+                    // values will be offset to 0
                    offset = offset.max(calculated_value - actual_value);
                } else {
-                    //positive value no offset reuqired
+                    // positive value no offset reuqired
                    rel_positive_max = rel_positive_max.max(actual_value - calculated_value);
                }
            }
@@ -336,8 +342,8 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
            .unwrap();

        // Estimate one block and extrapolate the cost to all blocks.
-        // the theory would be that we don't have the actual max_distance, but we are close within 50%
-        // threshold.
+        // the theory would be that we don't have the actual max_distance, but we are close within
+        // 50% threshold.
        // It is multiplied by 2 because in a log case scenario the line would be as much above as
        // below. So the offset would = max_distance
        //
@@ -364,11 +370,22 @@ mod tests {
    use super::*;
    use crate::tests::get_codec_test_data_sets;

-    fn create_and_validate(data: &[u64], name: &str) {
+    fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
        crate::tests::create_and_validate::<
            MultiLinearInterpolFastFieldSerializer,
            MultiLinearInterpolFastFieldReader,
-        >(data, name);
+        >(data, name)
+    }
+
+    #[test]
+    fn test_compression() {
+        let data = (10..=6_000_u64).collect::<Vec<_>>();
+        let (estimate, actual_compression) =
+            create_and_validate(&data, "simple monotonically large");
+        assert!(actual_compression < 0.2);
+        assert!(estimate < 0.20);
+        assert!(estimate > 0.15);
+        assert!(actual_compression > 0.01);
    }

    #[test]
@@ -400,9 +417,11 @@ mod tests {
    fn rand() {
        for _ in 0..10 {
            let mut data = (5_000..20_000)
-                .map(|_| rand::random::<u64>() as u64)
+                .map(|_| rand::random::<u32>() as u64)
                .collect::<Vec<_>>();
-            create_and_validate(&data, "random");
+            let (estimate, actual_compression) = create_and_validate(&data, "random");
+            dbg!(estimate);
+            dbg!(actual_compression);

            data.reverse();
            create_and_validate(&data, "random");
--- a/ownedbytes/Cargo.toml
+++ b/ownedbytes/Cargo.toml
@@ -1,9 +1,10 @@
 [package]
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 name = "ownedbytes"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2018"
 description = "Expose data as static slice"
+license = "MIT"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
--- a/ownedbytes/src/lib.rs
+++ b/ownedbytes/src/lib.rs
@@ -1,9 +1,9 @@
-use stable_deref_trait::StableDeref;
 use std::convert::TryInto;
-use std::mem;
 use std::ops::{Deref, Range};
 use std::sync::Arc;
-use std::{fmt, io};
+use std::{fmt, io, mem};
+
+use stable_deref_trait::StableDeref;

 /// An OwnedBytes simply wraps an object that owns a slice of data and exposes
 /// this data as a static slice.
@@ -35,6 +35,8 @@ impl OwnedBytes {
    }

    /// creates a fileslice that is just a view over a slice of the data.
+    #[must_use]
+    #[inline]
    pub fn slice(&self, range: Range<usize>) -> Self {
        OwnedBytes {
            data: &self.data[range],
@@ -63,6 +65,8 @@ impl OwnedBytes {
    /// On the other hand, both `left` and `right` retain a handle over
    /// the entire slice of memory. In other words, the memory will only
    /// be released when both left and right are dropped.
+    #[inline]
+    #[must_use]
    pub fn split(self, split_len: usize) -> (OwnedBytes, OwnedBytes) {
        let right_box_stable_deref = self.box_stable_deref.clone();
        let left = OwnedBytes {
@@ -76,6 +80,19 @@ impl OwnedBytes {
        (left, right)
    }

+    /// Splits the right part of the `OwnedBytes` at the given offset.
+    ///
+    /// `self` is truncated to `split_len`, left with the remaining bytes.
+    pub fn split_off(&mut self, split_len: usize) -> OwnedBytes {
+        let right_box_stable_deref = self.box_stable_deref.clone();
+        let right_piece = OwnedBytes {
+            data: &self.data[split_len..],
+            box_stable_deref: right_box_stable_deref,
+        };
+        self.data = &self.data[..split_len];
+        right_piece
+    }
+
    /// Returns true iff this `OwnedBytes` is empty.
    #[inline]
    pub fn is_empty(&self) -> bool {
@@ -83,8 +100,6 @@ impl OwnedBytes {
    }

    /// Drops the left most `advance_len` bytes.
-    ///
-    /// See also [.clip(clip_len: usize))](#method.clip).
    #[inline]
    pub fn advance(&mut self, advance_len: usize) {
        self.data = &self.data[advance_len..]
@@ -124,6 +139,34 @@ impl fmt::Debug for OwnedBytes {
    }
 }

+impl PartialEq for OwnedBytes {
+    fn eq(&self, other: &OwnedBytes) -> bool {
+        self.as_slice() == other.as_slice()
+    }
+}
+
+impl Eq for OwnedBytes {}
+
+impl PartialEq<[u8]> for OwnedBytes {
+    fn eq(&self, other: &[u8]) -> bool {
+        self.as_slice() == other
+    }
+}
+
+impl PartialEq<str> for OwnedBytes {
+    fn eq(&self, other: &str) -> bool {
+        self.as_slice() == other.as_bytes()
+    }
+}
+
+impl<'a, T: ?Sized> PartialEq<&'a T> for OwnedBytes
+where OwnedBytes: PartialEq<T>
+{
+    fn eq(&self, other: &&'a T) -> bool {
+        *self == **other
+    }
+}
+
 impl Deref for OwnedBytes {
    type Target = [u8];

@@ -287,4 +330,14 @@ mod tests {
            assert_eq!(right.as_slice(), b"");
        }
    }
+
+    #[test]
+    fn test_split_off() {
+        let mut data = OwnedBytes::new(b"abcdef".as_ref());
+        assert_eq!(data, "abcdef");
+        assert_eq!(data.split_off(2), "cdef");
+        assert_eq!(data, "ab");
+        assert_eq!(data.split_off(1), "b");
+        assert_eq!(data, "a");
+    }
 }
--- a/query-grammar/Cargo.toml
+++ b/query-grammar/Cargo.toml
@@ -5,9 +5,8 @@ authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
 description = """Search engine library"""
-documentation = "https://tantivy-search.github.io/tantivy/tantivy/index.html"
-homepage = "https://github.com/tantivy-search/tantivy"
-repository = "https://github.com/tantivy-search/tantivy"
+homepage = "https://github.com/quickwit-oss/tantivy"
+repository = "https://github.com/quickwit-oss/tantivy"
 readme = "README.md"
 keywords = ["search", "information", "retrieval"]
 edition = "2018"
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -1,17 +1,20 @@
-use super::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral};
-use crate::Occur;
+use combine::error::StringStreamError;
 use combine::parser::char::{char, digit, space, spaces, string};
+use combine::parser::combinator::recognize;
 use combine::parser::range::{take_while, take_while1};
 use combine::parser::repeat::escaped;
 use combine::parser::Parser;
 use combine::{
    attempt, choice, eof, many, many1, one_of, optional, parser, satisfy, skip_many1, value,
 };
-use combine::{error::StringStreamError, parser::combinator::recognize};
 use once_cell::sync::Lazy;
 use regex::Regex;

-// Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to special characters.
+use super::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral};
+use crate::Occur;
+
+// Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to
+// special characters.
 const SPECIAL_CHARS: &[char] = &[
    '+', '^', '`', ':', '{', '}', '"', '[', ']', '(', ')', '~', '!', '\\', '*', ' ',
 ];
@@ -363,9 +366,10 @@ mod test {

    type TestParseResult = Result<(), StringStreamError>;

-    use super::*;
    use combine::parser::Parser;

+    use super::*;
+
    pub fn nearly_equals(a: f64, b: f64) -> bool {
        (a - b).abs() < 0.0005 * (a + b).abs()
    }
--- a/query-grammar/src/user_input_ast.rs
+++ b/query-grammar/src/user_input_ast.rs
@@ -91,6 +91,7 @@ pub enum UserInputAst {
 }

 impl UserInputAst {
+    #[must_use]
    pub fn unary(self, occur: Occur) -> UserInputAst {
        UserInputAst::Clause(vec![(Some(occur), self)])
    }
--- a/rustfmt.toml
+++ b/rustfmt.toml
@@ -1 +1,7 @@
-use_try_shorthand = true
+comment_width = 120
+format_strings = true
+group_imports = "StdExternalCrate"
+imports_granularity = "Module"
+normalize_comments = true
+where_single_line = true
+wrap_comments = true
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -1,9 +1,6 @@
 use super::Collector;
 use crate::collector::SegmentCollector;
-use crate::DocId;
-use crate::Score;
-use crate::SegmentOrdinal;
-use crate::SegmentReader;
+use crate::{DocId, Score, SegmentOrdinal, SegmentReader};

 /// `CountCollector` collector only counts how many
 /// documents match the query.
@@ -20,10 +17,10 @@ use crate::SegmentReader;
 /// let index = Index::create_in_ram(schema);
 ///
 /// let mut index_writer = index.writer(3_000_000).unwrap();
-/// index_writer.add_document(doc!(title => "The Name of the Wind"));
-/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
-/// index_writer.add_document(doc!(title => "A Dairy Cow"));
-/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
+/// index_writer.add_document(doc!(title => "The Name of the Wind")).unwrap();
+/// index_writer.add_document(doc!(title => "The Diary of Muadib")).unwrap();
+/// index_writer.add_document(doc!(title => "A Dairy Cow")).unwrap();
+/// index_writer.add_document(doc!(title => "The Diary of a Young Girl")).unwrap();
 /// assert!(index_writer.commit().is_ok());
 ///
 /// let reader = index.reader().unwrap();
@@ -80,8 +77,7 @@ impl SegmentCollector for SegmentCountCollector {
 #[cfg(test)]
 mod tests {
    use super::{Count, SegmentCountCollector};
-    use crate::collector::Collector;
-    use crate::collector::SegmentCollector;
+    use crate::collector::{Collector, SegmentCollector};

    #[test]
    fn test_count_collect_does_not_requires_scoring() {
--- a/src/collector/custom_score_top_collector.rs
+++ b/src/collector/custom_score_top_collector.rs
@@ -8,8 +8,7 @@ pub(crate) struct CustomScoreTopCollector<TCustomScorer, TScore = Score> {
 }

 impl<TCustomScorer, TScore> CustomScoreTopCollector<TCustomScorer, TScore>
-where
-    TScore: Clone + PartialOrd,
+where TScore: Clone + PartialOrd
 {
    pub(crate) fn new(
        custom_scorer: TCustomScorer,
@@ -114,8 +113,7 @@ where
 }

 impl<F, TScore> CustomSegmentScorer<TScore> for F
-where
-    F: 'static + FnMut(DocId) -> TScore,
+where F: 'static + FnMut(DocId) -> TScore
 {
    fn score(&mut self, doc: DocId) -> TScore {
        (self)(doc)
--- a/src/collector/docset_collector.rs
+++ b/src/collector/docset_collector.rs
@@ -1,8 +1,7 @@
 use std::collections::HashSet;

-use crate::{DocAddress, DocId, Score};
-
 use super::{Collector, SegmentCollector};
+use crate::{DocAddress, DocId, Score};

 /// Collectors that returns the set of DocAddress that matches the query.
 ///
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -1,21 +1,14 @@
-use crate::collector::Collector;
-use crate::collector::SegmentCollector;
-use crate::fastfield::FacetReader;
-use crate::schema::Facet;
-use crate::schema::Field;
-use crate::DocId;
-use crate::Score;
-use crate::SegmentOrdinal;
-use crate::SegmentReader;
 use std::cmp::Ordering;
-use std::collections::btree_map;
-use std::collections::BTreeMap;
-use std::collections::BTreeSet;
-use std::collections::BinaryHeap;
+use std::collections::{btree_map, BTreeMap, BTreeSet, BinaryHeap};
 use std::iter::Peekable;
 use std::ops::Bound;
 use std::{u64, usize};

+use crate::collector::{Collector, SegmentCollector};
+use crate::fastfield::FacetReader;
+use crate::schema::{Facet, Field};
+use crate::{DocId, Score, SegmentOrdinal, SegmentReader};
+
 struct Hit<'a> {
    count: u64,
    facet: &'a Facet,
@@ -83,7 +76,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 /// ```rust
 /// use tantivy::collector::FacetCollector;
 /// use tantivy::query::AllQuery;
-/// use tantivy::schema::{Facet, Schema, INDEXED, TEXT};
+/// use tantivy::schema::{Facet, Schema, FacetOptions, TEXT};
 /// use tantivy::{doc, Index};
 ///
 /// fn example() -> tantivy::Result<()> {
@@ -92,7 +85,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 ///     // Facet have their own specific type.
 ///     // It is not a bad practise to put all of your
 ///     // facet information in the same field.
-///     let facet = schema_builder.add_facet_field("facet", INDEXED);
+///     let facet = schema_builder.add_facet_field("facet", FacetOptions::default());
 ///     let title = schema_builder.add_text_field("title", TEXT);
 ///     let schema = schema_builder.build();
 ///     let index = Index::create_in_ram(schema);
@@ -103,23 +96,23 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 ///             title => "The Name of the Wind",
 ///             facet => Facet::from("/lang/en"),
 ///             facet => Facet::from("/category/fiction/fantasy")
-///         ));
+///         ))?;
 ///         index_writer.add_document(doc!(
 ///             title => "Dune",
 ///             facet => Facet::from("/lang/en"),
 ///             facet => Facet::from("/category/fiction/sci-fi")
-///         ));
+///         ))?;
 ///         index_writer.add_document(doc!(
 ///             title => "La Vénus d'Ille",
 ///             facet => Facet::from("/lang/fr"),
 ///             facet => Facet::from("/category/fiction/fantasy"),
 ///             facet => Facet::from("/category/fiction/horror")
-///         ));
+///         ))?;
 ///         index_writer.add_document(doc!(
 ///             title => "The Diary of a Young Girl",
 ///             facet => Facet::from("/lang/en"),
 ///             facet => Facet::from("/category/biography")
-///         ));
+///         ))?;
 ///         index_writer.commit()?;
 ///     }
 ///     let reader = index.reader()?;
@@ -240,9 +233,7 @@ impl FacetCollector {
    /// If you need the correct number of unique documents for two such facets,
    /// just add them in separate `FacetCollector`.
    pub fn add_facet<T>(&mut self, facet_from: T)
-    where
-        Facet: From<T>,
-    {
+    where Facet: From<T> {
        let facet = Facet::from(facet_from);
        for old_facet in &self.facets {
            assert!(
@@ -400,11 +391,9 @@ impl<'a> Iterator for FacetChildIterator<'a> {

 impl FacetCounts {
    /// Returns an iterator over all of the facet count pairs inside this result.
-    /// See the documentation for `FacetCollector` for a usage example.
+    /// See the documentation for [FacetCollector] for a usage example.
    pub fn get<T>(&self, facet_from: T) -> FacetChildIterator<'_>
-    where
-        Facet: From<T>,
-    {
+    where Facet: From<T> {
        let facet = Facet::from(facet_from);
        let left_bound = Bound::Excluded(facet.clone());
        let right_bound = if facet.is_root() {
@@ -421,11 +410,9 @@ impl FacetCounts {
    }

    /// Returns a vector of top `k` facets with their counts, sorted highest-to-lowest by counts.
-    /// See the documentation for `FacetCollector` for a usage example.
+    /// See the documentation for [FacetCollector] for a usage example.
    pub fn top_k<T>(&self, facet: T, k: usize) -> Vec<(&Facet, u64)>
-    where
-        Facet: From<T>,
-    {
+    where Facet: From<T> {
        let mut heap = BinaryHeap::with_capacity(k);
        let mut it = self.get(facet);

@@ -458,25 +445,27 @@ impl FacetCounts {

 #[cfg(test)]
 mod tests {
+    use std::iter;
+
+    use rand::distributions::Uniform;
+    use rand::prelude::SliceRandom;
+    use rand::{thread_rng, Rng};
+
    use super::{FacetCollector, FacetCounts};
    use crate::collector::Count;
    use crate::core::Index;
    use crate::query::{AllQuery, QueryParser, TermQuery};
-    use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema, INDEXED};
+    use crate::schema::{Document, Facet, FacetOptions, Field, IndexRecordOption, Schema};
    use crate::Term;
-    use rand::distributions::Uniform;
-    use rand::prelude::SliceRandom;
-    use rand::{thread_rng, Rng};
-    use std::iter;

    #[test]
-    fn test_facet_collector_drilldown() {
+    fn test_facet_collector_drilldown() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
-        let facet_field = schema_builder.add_facet_field("facet", INDEXED);
+        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);

-        let mut index_writer = index.writer_for_tests().unwrap();
+        let mut index_writer = index.writer_for_tests()?;
        let num_facets: usize = 3 * 4 * 5;
        let facets: Vec<Facet> = (0..num_facets)
            .map(|mut n| {
@@ -491,14 +480,14 @@ mod tests {
        for i in 0..num_facets * 10 {
            let mut doc = Document::new();
            doc.add_facet(facet_field, facets[i % num_facets].clone());
-            index_writer.add_document(doc);
+            index_writer.add_document(doc)?;
        }
-        index_writer.commit().unwrap();
-        let reader = index.reader().unwrap();
+        index_writer.commit()?;
+        let reader = index.reader()?;
        let searcher = reader.searcher();
        let mut facet_collector = FacetCollector::for_field(facet_field);
        facet_collector.add_facet(Facet::from("/top1"));
-        let counts = searcher.search(&AllQuery, &facet_collector).unwrap();
+        let counts = searcher.search(&AllQuery, &facet_collector)?;

        {
            let facets: Vec<(String, u64)> = counts
@@ -518,11 +507,13 @@ mod tests {
                .collect::<Vec<_>>()
            );
        }
+        Ok(())
    }

    #[test]
-    #[should_panic(expected = "Tried to add a facet which is a descendant of \
-                               an already added facet.")]
+    #[should_panic(
+        expected = "Tried to add a facet which is a descendant of an already added facet."
+    )]
    fn test_misused_facet_collector() {
        let mut facet_collector = FacetCollector::for_field(Field::from_field_id(0));
        facet_collector.add_facet(Facet::from("/country"));
@@ -530,48 +521,49 @@ mod tests {
    }

    #[test]
-    fn test_doc_unsorted_multifacet() {
+    fn test_doc_unsorted_multifacet() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
-        let facet_field = schema_builder.add_facet_field("facets", INDEXED);
+        let facet_field = schema_builder.add_facet_field("facets", FacetOptions::default());
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests().unwrap();
+        let mut index_writer = index.writer_for_tests()?;
        index_writer.add_document(doc!(
            facet_field => Facet::from_text(&"/subjects/A/a").unwrap(),
            facet_field => Facet::from_text(&"/subjects/B/a").unwrap(),
            facet_field => Facet::from_text(&"/subjects/A/b").unwrap(),
            facet_field => Facet::from_text(&"/subjects/B/b").unwrap(),
-        ));
-        index_writer.commit().unwrap();
-        let reader = index.reader().unwrap();
+        ))?;
+        index_writer.commit()?;
+        let reader = index.reader()?;
        let searcher = reader.searcher();
        assert_eq!(searcher.num_docs(), 1);
        let mut facet_collector = FacetCollector::for_field(facet_field);
        facet_collector.add_facet("/subjects");
-        let counts = searcher.search(&AllQuery, &facet_collector).unwrap();
+        let counts = searcher.search(&AllQuery, &facet_collector)?;
        let facets: Vec<(&Facet, u64)> = counts.get("/subjects").collect();
        assert_eq!(facets[0].1, 1);
+        Ok(())
    }

    #[test]
    fn test_doc_search_by_facet() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
-        let facet_field = schema_builder.add_facet_field("facet", INDEXED);
+        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_for_tests()?;
        index_writer.add_document(doc!(
            facet_field => Facet::from_text(&"/A/A").unwrap(),
-        ));
+        ))?;
        index_writer.add_document(doc!(
            facet_field => Facet::from_text(&"/A/B").unwrap(),
-        ));
+        ))?;
        index_writer.add_document(doc!(
            facet_field => Facet::from_text(&"/A/C/A").unwrap(),
-        ));
+        ))?;
        index_writer.add_document(doc!(
            facet_field => Facet::from_text(&"/D/C/A").unwrap(),
-        ));
+        ))?;
        index_writer.commit()?;
        let reader = index.reader()?;
        let searcher = reader.searcher();
@@ -613,7 +605,7 @@ mod tests {
    #[test]
    fn test_facet_collector_topk() {
        let mut schema_builder = Schema::builder();
-        let facet_field = schema_builder.add_facet_field("facet", INDEXED);
+        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);

@@ -637,7 +629,7 @@ mod tests {

        let mut index_writer = index.writer_for_tests().unwrap();
        for doc in docs {
-            index_writer.add_document(doc);
+            index_writer.add_document(doc).unwrap();
        }
        index_writer.commit().unwrap();
        let searcher = index.reader().unwrap().searcher();
@@ -662,7 +654,7 @@ mod tests {
    #[test]
    fn test_facet_collector_topk_tie_break() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
-        let facet_field = schema_builder.add_facet_field("facet", INDEXED);
+        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);

@@ -677,7 +669,7 @@ mod tests {

        let mut index_writer = index.writer_for_tests()?;
        for doc in docs {
-            index_writer.add_document(doc);
+            index_writer.add_document(doc)?;
        }
        index_writer.commit()?;

@@ -698,13 +690,14 @@ mod tests {
 #[cfg(all(test, feature = "unstable"))]
 mod bench {

+    use rand::seq::SliceRandom;
+    use rand::thread_rng;
+    use test::Bencher;
+
    use crate::collector::FacetCollector;
    use crate::query::AllQuery;
    use crate::schema::{Facet, Schema, INDEXED};
    use crate::Index;
-    use rand::seq::SliceRandom;
-    use rand::thread_rng;
-    use test::Bencher;

    #[bench]
    fn bench_facet_collector(b: &mut Bencher) {
@@ -725,7 +718,7 @@ mod bench {

        let mut index_writer = index.writer_for_tests().unwrap();
        for doc in docs {
-            index_writer.add_document(doc);
+            index_writer.add_document(doc).unwrap();
        }
        index_writer.commit().unwrap();
        let reader = index.reader().unwrap();
--- a/src/collector/filter_collector_wrapper.rs
+++ b/src/collector/filter_collector_wrapper.rs
@@ -16,8 +16,9 @@ use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
 use crate::schema::Field;
 use crate::{Score, SegmentReader, TantivyError};

-/// The `FilterCollector` collector filters docs using a fast field value and a predicate.
-/// Only the documents for which the predicate returned "true" will be passed on to the next collector.
+/// The `FilterCollector` filters docs using a fast field value and a predicate.
+/// Only the documents for which the predicate returned "true" will be passed on to the next
+/// collector.
 ///
 /// ```rust
 /// use tantivy::collector::{TopDocs, FilterCollector};
@@ -25,38 +26,40 @@ use crate::{Score, SegmentReader, TantivyError};
 /// use tantivy::schema::{Schema, TEXT, INDEXED, FAST};
 /// use tantivy::{doc, DocAddress, Index};
 ///
+/// # fn main() -> tantivy::Result<()> {
 /// let mut schema_builder = Schema::builder();
 /// let title = schema_builder.add_text_field("title", TEXT);
 /// let price = schema_builder.add_u64_field("price", INDEXED | FAST);
 /// let schema = schema_builder.build();
 /// let index = Index::create_in_ram(schema);
 ///
-/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
-/// index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64));
-/// index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64));
-/// index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64));
-/// index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64));
-/// assert!(index_writer.commit().is_ok());
+/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+/// index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64))?;
+/// index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64))?;
+/// index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64))?;
+/// index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64))?;
+/// index_writer.commit()?;
 ///
-/// let reader = index.reader().unwrap();
+/// let reader = index.reader()?;
 /// let searcher = reader.searcher();
 ///
 /// let query_parser = QueryParser::for_index(&index, vec![title]);
-/// let query = query_parser.parse_query("diary").unwrap();
+/// let query = query_parser.parse_query("diary")?;
 /// let no_filter_collector = FilterCollector::new(price, &|value: u64| value > 20_120u64, TopDocs::with_limit(2));
-/// let top_docs = searcher.search(&query, &no_filter_collector).unwrap();
+/// let top_docs = searcher.search(&query, &no_filter_collector)?;
 ///
 /// assert_eq!(top_docs.len(), 1);
 /// assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
 ///
 /// let filter_all_collector: FilterCollector<_, _, u64> = FilterCollector::new(price, &|value| value < 5u64, TopDocs::with_limit(2));
-/// let filtered_top_docs = searcher.search(&query, &filter_all_collector).unwrap();
+/// let filtered_top_docs = searcher.search(&query, &filter_all_collector)?;
 ///
 /// assert_eq!(filtered_top_docs.len(), 0);
+/// # Ok(())
+/// # }
 /// ```
 pub struct FilterCollector<TCollector, TPredicate, TPredicateValue: FastValue>
-where
-    TPredicate: 'static + Clone,
+where TPredicate: 'static + Clone
 {
    field: Field,
    collector: TCollector,
--- a/src/collector/histogram_collector.rs
+++ b/src/collector/histogram_collector.rs
@@ -1,8 +1,9 @@
+use fastdivide::DividerU64;
+
 use crate::collector::{Collector, SegmentCollector};
 use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
 use crate::schema::{Field, Type};
 use crate::{DocId, Score};
-use fastdivide::DividerU64;

 /// Histogram builds an histogram of the values of a fastfield for the
 /// collected DocSet.
@@ -36,8 +37,8 @@ impl HistogramCollector {
    ///  - `bucket_width`: the length of the interval that is associated to each buckets.
    ///  - `num_buckets`: The overall number of buckets.
    ///
-    /// Together, this parameters define a partition of `[min_value, min_value + num_buckets * bucket_width)`
-    /// into `num_buckets` intervals of width bucket that we call `bucket`.
+    /// Together, this parameters define a partition of `[min_value, min_value + num_buckets *
+    /// bucket_width)` into `num_buckets` intervals of width bucket that we call `bucket`.
    ///
    /// # Disclaimer
    /// This function panics if the field given is of type f64.
@@ -147,12 +148,13 @@ fn add_vecs(mut vals_list: Vec<Vec<u64>>, len: usize) -> Vec<u64> {

 #[cfg(test)]
 mod tests {
+    use fastdivide::DividerU64;
+    use query::AllQuery;
+
    use super::{add_vecs, HistogramCollector, HistogramComputer};
    use crate::chrono::{TimeZone, Utc};
    use crate::schema::{Schema, FAST};
    use crate::{doc, query, Index};
-    use fastdivide::DividerU64;
-    use query::AllQuery;

    #[test]
    fn test_add_histograms_simple() {
@@ -226,10 +228,10 @@ mod tests {
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
-        writer.add_document(doc!(val_field=>12i64));
-        writer.add_document(doc!(val_field=>-30i64));
-        writer.add_document(doc!(val_field=>-12i64));
-        writer.add_document(doc!(val_field=>-10i64));
+        writer.add_document(doc!(val_field=>12i64))?;
+        writer.add_document(doc!(val_field=>-30i64))?;
+        writer.add_document(doc!(val_field=>-12i64))?;
+        writer.add_document(doc!(val_field=>-10i64))?;
        writer.commit()?;
        let reader = index.reader()?;
        let searcher = reader.searcher();
@@ -247,13 +249,13 @@ mod tests {
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
-        writer.add_document(doc!(val_field=>12i64));
+        writer.add_document(doc!(val_field=>12i64))?;
        writer.commit()?;
-        writer.add_document(doc!(val_field=>-30i64));
+        writer.add_document(doc!(val_field=>-30i64))?;
        writer.commit()?;
-        writer.add_document(doc!(val_field=>-12i64));
+        writer.add_document(doc!(val_field=>-12i64))?;
        writer.commit()?;
-        writer.add_document(doc!(val_field=>-10i64));
+        writer.add_document(doc!(val_field=>-10i64))?;
        writer.commit()?;
        let reader = index.reader()?;
        let searcher = reader.searcher();
@@ -271,9 +273,9 @@ mod tests {
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
-        writer.add_document(doc!(date_field=>Utc.ymd(1982, 9, 17).and_hms(0, 0,0)));
-        writer.add_document(doc!(date_field=>Utc.ymd(1986, 3, 9).and_hms(0, 0, 0)));
-        writer.add_document(doc!(date_field=>Utc.ymd(1983, 9, 27).and_hms(0, 0, 0)));
+        writer.add_document(doc!(date_field=>Utc.ymd(1982, 9, 17).and_hms(0, 0,0)))?;
+        writer.add_document(doc!(date_field=>Utc.ymd(1986, 3, 9).and_hms(0, 0, 0)))?;
+        writer.add_document(doc!(date_field=>Utc.ymd(1983, 9, 27).and_hms(0, 0, 0)))?;
        writer.commit()?;
        let reader = index.reader()?;
        let searcher = reader.searcher();
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -1,95 +1,90 @@
-/*!
+//! # Collectors
+//!
+//! Collectors define the information you want to extract from the documents matching the queries.
+//! In tantivy jargon, we call this information your search "fruit".
+//!
+//! Your fruit could for instance be :
+//! - [the count of matching documents](./struct.Count.html)
+//! - [the top 10 documents, by relevancy or by a fast field](./struct.TopDocs.html)
+//! - [facet counts](./struct.FacetCollector.html)
+//!
+//! At one point in your code, you will trigger the actual search operation by calling
+//! [the `search(...)` method of your `Searcher` object](../struct.Searcher.html#method.search).
+//! This call will look like this.
+//!
+//! ```verbatim
+//! let fruit = searcher.search(&query, &collector)?;
+//! ```
+//!
+//! Here the type of fruit is actually determined as an associated type of the collector
+//! (`Collector::Fruit`).
+//!
+//!
+//! # Combining several collectors
+//!
+//! A rich search experience often requires to run several collectors on your search query.
+//! For instance,
+//! - selecting the top-K products matching your query
+//! - counting the matching documents
+//! - computing several facets
+//! - computing statistics about the matching product prices
+//!
+//! A simple and efficient way to do that is to pass your collectors as one tuple.
+//! The resulting `Fruit` will then be a typed tuple with each collector's original fruits
+//! in their respective position.
+//!
+//! ```rust
+//! # use tantivy::schema::*;
+//! # use tantivy::*;
+//! # use tantivy::query::*;
+//! use tantivy::collector::{Count, TopDocs};
+//! #
+//! # fn main() -> tantivy::Result<()> {
+//! # let mut schema_builder = Schema::builder();
+//! #     let title = schema_builder.add_text_field("title", TEXT);
+//! #     let schema = schema_builder.build();
+//! #     let index = Index::create_in_ram(schema);
+//! #     let mut index_writer = index.writer(3_000_000)?;
+//! #       index_writer.add_document(doc!(
+//! #       title => "The Name of the Wind",
+//! #      ))?;
+//! #     index_writer.add_document(doc!(
+//! #        title => "The Diary of Muadib",
+//! #     ))?;
+//! #     index_writer.commit()?;
+//! #     let reader = index.reader()?;
+//! #     let searcher = reader.searcher();
+//! #     let query_parser = QueryParser::for_index(&index, vec![title]);
+//! #     let query = query_parser.parse_query("diary")?;
+//! let (doc_count, top_docs): (usize, Vec<(Score, DocAddress)>) =
+//! searcher.search(&query, &(Count, TopDocs::with_limit(2)))?;
+//! #     Ok(())
+//! # }
+//! ```
+//!
+//! The `Collector` trait is implemented for up to 4 collectors.
+//! If you have more than 4 collectors, you can either group them into
+//! tuples of tuples `(a,(b,(c,d)))`, or rely on [`MultiCollector`](./struct.MultiCollector.html).
+//!
+//! # Combining several collectors dynamically
+//!
+//! Combining collectors into a tuple is a zero-cost abstraction: everything
+//! happens as if you had manually implemented a single collector
+//! combining all of our features.
+//!
+//! Unfortunately it requires you to know at compile time your collector types.
+//! If on the other hand, the collectors depend on some query parameter,
+//! you can rely on `MultiCollector`'s.
+//!
+//!
+//! # Implementing your own collectors.
+//!
+//! See the `custom_collector` example.

-# Collectors
-
-Collectors define the information you want to extract from the documents matching the queries.
-In tantivy jargon, we call this information your search "fruit".
-
-Your fruit could for instance be :
- [the count of matching documents](./struct.Count.html)
- [the top 10 documents, by relevancy or by a fast field](./struct.TopDocs.html)
- [facet counts](./struct.FacetCollector.html)
-
-At one point in your code, you will trigger the actual search operation by calling
-[the `search(...)` method of your `Searcher` object](../struct.Searcher.html#method.search).
-This call will look like this.
-
-```verbatim
-let fruit = searcher.search(&query, &collector)?;
-```
-
-Here the type of fruit is actually determined as an associated type of the collector (`Collector::Fruit`).
-
-
-# Combining several collectors
-
-A rich search experience often requires to run several collectors on your search query.
-For instance,
- selecting the top-K products matching your query
- counting the matching documents
- computing several facets
- computing statistics about the matching product prices
-
-A simple and efficient way to do that is to pass your collectors as one tuple.
-The resulting `Fruit` will then be a typed tuple with each collector's original fruits
-in their respective position.
-
-```rust
-# use tantivy::schema::*;
-# use tantivy::*;
-# use tantivy::query::*;
-use tantivy::collector::{Count, TopDocs};
-#
-# fn main() -> tantivy::Result<()> {
-# let mut schema_builder = Schema::builder();
-#     let title = schema_builder.add_text_field("title", TEXT);
-#     let schema = schema_builder.build();
-#     let index = Index::create_in_ram(schema);
-#     let mut index_writer = index.writer(3_000_000)?;
-#       index_writer.add_document(doc!(
-#       title => "The Name of the Wind",
-#      ));
-#     index_writer.add_document(doc!(
-#        title => "The Diary of Muadib",
-#     ));
-#     index_writer.commit()?;
-#     let reader = index.reader()?;
-#     let searcher = reader.searcher();
-#     let query_parser = QueryParser::for_index(&index, vec![title]);
-#     let query = query_parser.parse_query("diary")?;
-let (doc_count, top_docs): (usize, Vec<(Score, DocAddress)>) =
-    searcher.search(&query, &(Count, TopDocs::with_limit(2)))?;
-#     Ok(())
-# }
-```
-
-The `Collector` trait is implemented for up to 4 collectors.
-If you have more than 4 collectors, you can either group them into
-tuples of tuples `(a,(b,(c,d)))`, or rely on [`MultiCollector`](./struct.MultiCollector.html).
-
-# Combining several collectors dynamically
-
-Combining collectors into a tuple is a zero-cost abstraction: everything
-happens as if you had manually implemented a single collector
-combining all of our features.
-
-Unfortunately it requires you to know at compile time your collector types.
-If on the other hand, the collectors depend on some query parameter,
-you can rely on `MultiCollector`'s.
-
-
-# Implementing your own collectors.
-
-See the `custom_collector` example.
-
-*/
-
-use crate::DocId;
-use crate::Score;
-use crate::SegmentOrdinal;
-use crate::SegmentReader;
 use downcast_rs::impl_downcast;

+use crate::{DocId, Score, SegmentOrdinal, SegmentReader};
+
 mod count_collector;
 pub use self::count_collector::Count;

@@ -111,8 +106,7 @@ mod tweak_score_top_collector;
 pub use self::tweak_score_top_collector::{ScoreSegmentTweaker, ScoreTweaker};

 mod facet_collector;
-pub use self::facet_collector::FacetCollector;
-pub use self::facet_collector::FacetCounts;
+pub use self::facet_collector::{FacetCollector, FacetCounts};
 use crate::query::Weight;

 mod docset_collector;
@@ -178,9 +172,9 @@ pub trait Collector: Sync + Send {
    ) -> crate::Result<<Self::Child as SegmentCollector>::Fruit> {
        let mut segment_collector = self.for_segment(segment_ord as u32, reader)?;

-        if let Some(delete_bitset) = reader.delete_bitset() {
+        if let Some(alive_bitset) = reader.alive_bitset() {
            weight.for_each(reader, &mut |doc, score| {
-                if delete_bitset.is_alive(doc) {
+                if alive_bitset.is_alive(doc) {
                    segment_collector.collect(doc, score);
                }
            })?;
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -1,14 +1,10 @@
-use super::Collector;
-use super::SegmentCollector;
-use crate::collector::Fruit;
-use crate::DocId;
-use crate::Score;
-use crate::SegmentOrdinal;
-use crate::SegmentReader;
-use crate::TantivyError;
 use std::marker::PhantomData;
 use std::ops::Deref;

+use super::{Collector, SegmentCollector};
+use crate::collector::Fruit;
+use crate::{DocId, Score, SegmentOrdinal, SegmentReader, TantivyError};
+
 pub struct MultiFruit {
    sub_fruits: Vec<Option<Box<dyn Fruit>>>,
 }
@@ -104,7 +100,8 @@ impl<TFruit: Fruit> FruitHandle<TFruit> {
 ///
 /// If the type of the collectors is known, you can just group yours collectors
 /// in a tuple. See the
-/// [Combining several collectors section of the collector documentation](./index.html#combining-several-collectors).
+/// [Combining several collectors section of the collector
+/// documentation](./index.html#combining-several-collectors).
 ///
 /// ```rust
 /// use tantivy::collector::{Count, TopDocs, MultiCollector};
@@ -112,19 +109,19 @@ impl<TFruit: Fruit> FruitHandle<TFruit> {
 /// use tantivy::schema::{Schema, TEXT};
 /// use tantivy::{doc, Index};
 ///
+/// # fn main() -> tantivy::Result<()> {
 /// let mut schema_builder = Schema::builder();
 /// let title = schema_builder.add_text_field("title", TEXT);
 /// let schema = schema_builder.build();
 /// let index = Index::create_in_ram(schema);
+/// let mut index_writer = index.writer(3_000_000)?;
+/// index_writer.add_document(doc!(title => "The Name of the Wind"))?;
+/// index_writer.add_document(doc!(title => "The Diary of Muadib"))?;
+/// index_writer.add_document(doc!(title => "A Dairy Cow"))?;
+/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"))?;
+/// index_writer.commit()?;
 ///
-/// let mut index_writer = index.writer(3_000_000).unwrap();
-/// index_writer.add_document(doc!(title => "The Name of the Wind"));
-/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
-/// index_writer.add_document(doc!(title => "A Dairy Cow"));
-/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
-/// assert!(index_writer.commit().is_ok());
-///
-/// let reader = index.reader().unwrap();
+/// let reader = index.reader()?;
 /// let searcher = reader.searcher();
 ///
 /// let mut collectors = MultiCollector::new();
@@ -139,6 +136,8 @@ impl<TFruit: Fruit> FruitHandle<TFruit> {
 ///
 /// assert_eq!(count, 2);
 /// assert_eq!(top_docs.len(), 2);
+/// # Ok(())
+/// # }
 /// ```
 #[allow(clippy::type_complexity)]
 #[derive(Default)]
@@ -246,30 +245,28 @@ mod tests {
    use super::*;
    use crate::collector::{Count, TopDocs};
    use crate::query::TermQuery;
-    use crate::schema::IndexRecordOption;
-    use crate::schema::{Schema, TEXT};
-    use crate::Index;
-    use crate::Term;
+    use crate::schema::{IndexRecordOption, Schema, TEXT};
+    use crate::{Index, Term};

    #[test]
-    fn test_multi_collector() {
+    fn test_multi_collector() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let text = schema_builder.add_text_field("text", TEXT);
        let schema = schema_builder.build();

        let index = Index::create_in_ram(schema);
        {
-            let mut index_writer = index.writer_for_tests().unwrap();
-            index_writer.add_document(doc!(text=>"abc"));
-            index_writer.add_document(doc!(text=>"abc abc abc"));
-            index_writer.add_document(doc!(text=>"abc abc"));
-            index_writer.commit().unwrap();
-            index_writer.add_document(doc!(text=>""));
-            index_writer.add_document(doc!(text=>"abc abc abc abc"));
-            index_writer.add_document(doc!(text=>"abc"));
-            index_writer.commit().unwrap();
+            let mut index_writer = index.writer_for_tests()?;
+            index_writer.add_document(doc!(text=>"abc"))?;
+            index_writer.add_document(doc!(text=>"abc abc abc"))?;
+            index_writer.add_document(doc!(text=>"abc abc"))?;
+            index_writer.commit()?;
+            index_writer.add_document(doc!(text=>""))?;
+            index_writer.add_document(doc!(text=>"abc abc abc abc"))?;
+            index_writer.add_document(doc!(text=>"abc"))?;
+            index_writer.commit()?;
        }
-        let searcher = index.reader().unwrap().searcher();
+        let searcher = index.reader()?.searcher();
        let term = Term::from_field_text(text, "abc");
        let query = TermQuery::new(term, IndexRecordOption::Basic);

@@ -280,5 +277,6 @@ mod tests {

        assert_eq!(count_handler.extract(&mut multifruits), 5);
        assert_eq!(topdocs_handler.extract(&mut multifruits).len(), 2);
+        Ok(())
    }
 }
--- a/src/collector/tests.rs
+++ b/src/collector/tests.rs
@@ -1,21 +1,13 @@
-use super::*;
-use crate::core::SegmentReader;
-use crate::fastfield::BytesFastFieldReader;
-use crate::fastfield::DynamicFastFieldReader;
-use crate::fastfield::FastFieldReader;
-use crate::schema::Field;
-use crate::DocId;
-use crate::Score;
-use crate::SegmentOrdinal;
-use crate::{DocAddress, Document, Searcher};
-
-use crate::collector::{Count, FilterCollector, TopDocs};
-use crate::query::{AllQuery, QueryParser};
-use crate::schema::{Schema, FAST, TEXT};
-use crate::DateTime;
-use crate::{doc, Index};
 use std::str::FromStr;

+use super::*;
+use crate::collector::{Count, FilterCollector, TopDocs};
+use crate::core::SegmentReader;
+use crate::fastfield::{BytesFastFieldReader, DynamicFastFieldReader, FastFieldReader};
+use crate::query::{AllQuery, QueryParser};
+use crate::schema::{Field, Schema, FAST, TEXT};
+use crate::{doc, DateTime, DocAddress, DocId, Document, Index, Score, Searcher, SegmentOrdinal};
+
 pub const TEST_COLLECTOR_WITH_SCORE: TestCollector = TestCollector {
    compute_score: true,
 };
@@ -25,7 +17,7 @@ pub const TEST_COLLECTOR_WITHOUT_SCORE: TestCollector = TestCollector {
 };

 #[test]
-pub fn test_filter_collector() {
+pub fn test_filter_collector() -> crate::Result<()> {
    let mut schema_builder = Schema::builder();
    let title = schema_builder.add_text_field("title", TEXT);
    let price = schema_builder.add_u64_field("price", FAST);
@@ -33,25 +25,25 @@ pub fn test_filter_collector() {
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema);

-    let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
-    index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::from_str("1898-04-09T00:00:00+00:00").unwrap()));
-    index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::from_str("2020-04-09T00:00:00+00:00").unwrap()));
-    index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::from_str("2019-04-20T00:00:00+00:00").unwrap()));
-    index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()));
-    index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::from_str("2018-04-09T00:00:00+00:00").unwrap()));
-    assert!(index_writer.commit().is_ok());
+    let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::from_str("1898-04-09T00:00:00+00:00").unwrap()))?;
+    index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::from_str("2020-04-09T00:00:00+00:00").unwrap()))?;
+    index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::from_str("2019-04-20T00:00:00+00:00").unwrap()))?;
+    index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()))?;
+    index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::from_str("2018-04-09T00:00:00+00:00").unwrap()))?;
+    index_writer.commit()?;

-    let reader = index.reader().unwrap();
+    let reader = index.reader()?;
    let searcher = reader.searcher();

    let query_parser = QueryParser::for_index(&index, vec![title]);
-    let query = query_parser.parse_query("diary").unwrap();
+    let query = query_parser.parse_query("diary")?;
    let filter_some_collector = FilterCollector::new(
        price,
        &|value: u64| value > 20_120u64,
        TopDocs::with_limit(2),
    );
-    let top_docs = searcher.search(&query, &filter_some_collector).unwrap();
+    let top_docs = searcher.search(&query, &filter_some_collector)?;

    assert_eq!(top_docs.len(), 1);
    assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
@@ -67,9 +59,10 @@ pub fn test_filter_collector() {
    }

    let filter_dates_collector = FilterCollector::new(date, &date_filter, TopDocs::with_limit(5));
-    let filtered_date_docs = searcher.search(&query, &filter_dates_collector).unwrap();
+    let filtered_date_docs = searcher.search(&query, &filter_dates_collector)?;

    assert_eq!(filtered_date_docs.len(), 2);
+    Ok(())
 }

 /// Stores all of the doc ids.
@@ -274,8 +267,8 @@ fn make_test_searcher() -> crate::Result<crate::LeasedItem<Searcher>> {
    let schema = Schema::builder().build();
    let index = Index::create_in_ram(schema);
    let mut index_writer = index.writer_for_tests()?;
-    index_writer.add_document(Document::default());
-    index_writer.add_document(Document::default());
+    index_writer.add_document(Document::default())?;
+    index_writer.add_document(Document::default())?;
    index_writer.commit()?;
    Ok(index.reader()?.searcher())
 }
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -1,11 +1,9 @@
-use crate::DocAddress;
-use crate::DocId;
-use crate::SegmentOrdinal;
-use crate::SegmentReader;
 use std::cmp::Ordering;
 use std::collections::BinaryHeap;
 use std::marker::PhantomData;

+use crate::{DocAddress, DocId, SegmentOrdinal, SegmentReader};
+
 /// Contains a feature (field, score, etc.) of a document along with the document address.
 ///
 /// It has a custom implementation of `PartialOrd` that reverses the order. This is because the
@@ -62,17 +60,14 @@ pub(crate) struct TopCollector<T> {
 }

 impl<T> TopCollector<T>
-where
-    T: PartialOrd + Clone,
+where T: PartialOrd + Clone
 {
    /// Creates a top collector, with a number of documents equal to "limit".
    ///
    /// # Panics
    /// The method panics if limit is 0
    pub fn with_limit(limit: usize) -> TopCollector<T> {
-        if limit < 1 {
-            panic!("Limit must be strictly greater than 0.");
-        }
+        assert!(limit >= 1, "Limit must be strictly greater than 0.");
        Self {
            limit,
            offset: 0,
@@ -255,7 +250,7 @@ mod tests {
        // when harvesting we have to guarantee stable sorting in case of a tie
        // on the score
        let doc_ids_collection = [4, 5, 6];
-        let score = 3.14;
+        let score = 3.3f32;

        let mut top_collector_limit_2 = TopSegmentCollector::new(0, 2);
        for id in &doc_ids_collection {
@@ -324,9 +319,10 @@ mod tests {

 #[cfg(all(test, feature = "unstable"))]
 mod bench {
-    use super::TopSegmentCollector;
    use test::Bencher;

+    use super::TopSegmentCollector;
+
    #[bench]
    fn bench_top_segment_collector_collect_not_at_capacity(b: &mut Bencher) {
        let mut top_collector = TopSegmentCollector::new(0, 400);
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -1,21 +1,18 @@
+use std::collections::BinaryHeap;
+use std::fmt;
+use std::marker::PhantomData;
+
 use super::Collector;
-use crate::collector::top_collector::{ComparableDoc, TopCollector};
+use crate::collector::custom_score_top_collector::CustomScoreTopCollector;
+use crate::collector::top_collector::{ComparableDoc, TopCollector, TopSegmentCollector};
 use crate::collector::tweak_score_top_collector::TweakedScoreTopCollector;
 use crate::collector::{
    CustomScorer, CustomSegmentScorer, ScoreSegmentTweaker, ScoreTweaker, SegmentCollector,
 };
-use crate::fastfield::{DynamicFastFieldReader, FastFieldReader};
+use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
 use crate::query::Weight;
 use crate::schema::Field;
-use crate::DocAddress;
-use crate::DocId;
-use crate::Score;
-use crate::SegmentOrdinal;
-use crate::SegmentReader;
-use crate::{collector::custom_score_top_collector::CustomScoreTopCollector, fastfield::FastValue};
-use crate::{collector::top_collector::TopSegmentCollector, TantivyError};
-use std::fmt;
-use std::{collections::BinaryHeap, marker::PhantomData};
+use crate::{DocAddress, DocId, Score, SegmentOrdinal, SegmentReader, TantivyError};

 struct FastFieldConvertCollector<
    TCollector: Collector<Fruit = Vec<(u64, DocAddress)>>,
@@ -94,27 +91,30 @@ where
 /// use tantivy::schema::{Schema, TEXT};
 /// use tantivy::{doc, DocAddress, Index};
 ///
+/// # fn main() -> tantivy::Result<()> {
 /// let mut schema_builder = Schema::builder();
 /// let title = schema_builder.add_text_field("title", TEXT);
 /// let schema = schema_builder.build();
 /// let index = Index::create_in_ram(schema);
 ///
-/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
-/// index_writer.add_document(doc!(title => "The Name of the Wind"));
-/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
-/// index_writer.add_document(doc!(title => "A Dairy Cow"));
-/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
-/// assert!(index_writer.commit().is_ok());
+/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+/// index_writer.add_document(doc!(title => "The Name of the Wind"))?;
+/// index_writer.add_document(doc!(title => "The Diary of Muadib"))?;
+/// index_writer.add_document(doc!(title => "A Dairy Cow"))?;
+/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"))?;
+/// index_writer.commit()?;
 ///
-/// let reader = index.reader().unwrap();
+/// let reader = index.reader()?;
 /// let searcher = reader.searcher();
 ///
 /// let query_parser = QueryParser::for_index(&index, vec![title]);
-/// let query = query_parser.parse_query("diary").unwrap();
-/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2)).unwrap();
+/// let query = query_parser.parse_query("diary")?;
+/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2))?;
 ///
 /// assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
 /// assert_eq!(top_docs[1].1, DocAddress::new(0, 3));
+/// # Ok(())
+/// # }
 /// ```
 pub struct TopDocs(TopCollector<Score>);

@@ -180,41 +180,46 @@ impl TopDocs {
    /// use tantivy::schema::{Schema, TEXT};
    /// use tantivy::{doc, DocAddress, Index};
    ///
+    /// # fn main() -> tantivy::Result<()> {
    /// let mut schema_builder = Schema::builder();
    /// let title = schema_builder.add_text_field("title", TEXT);
    /// let schema = schema_builder.build();
    /// let index = Index::create_in_ram(schema);
    ///
-    /// let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
-    /// index_writer.add_document(doc!(title => "The Name of the Wind"));
-    /// index_writer.add_document(doc!(title => "The Diary of Muadib"));
-    /// index_writer.add_document(doc!(title => "A Dairy Cow"));
-    /// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
-    /// index_writer.add_document(doc!(title => "The Diary of Lena Mukhina"));
-    /// assert!(index_writer.commit().is_ok());
+    /// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    /// index_writer.add_document(doc!(title => "The Name of the Wind"))?;
+    /// index_writer.add_document(doc!(title => "The Diary of Muadib"))?;
+    /// index_writer.add_document(doc!(title => "A Dairy Cow"))?;
+    /// index_writer.add_document(doc!(title => "The Diary of a Young Girl"))?;
+    /// index_writer.add_document(doc!(title => "The Diary of Lena Mukhina"))?;
+    /// index_writer.commit()?;
    ///
-    /// let reader = index.reader().unwrap();
+    /// let reader = index.reader()?;
    /// let searcher = reader.searcher();
    ///
    /// let query_parser = QueryParser::for_index(&index, vec![title]);
-    /// let query = query_parser.parse_query("diary").unwrap();
-    /// let top_docs = searcher.search(&query, &TopDocs::with_limit(2).and_offset(1)).unwrap();
+    /// let query = query_parser.parse_query("diary")?;
+    /// let top_docs = searcher.search(&query, &TopDocs::with_limit(2).and_offset(1))?;
    ///
    /// assert_eq!(top_docs.len(), 2);
    /// assert_eq!(top_docs[0].1, DocAddress::new(0, 4));
    /// assert_eq!(top_docs[1].1, DocAddress::new(0, 3));
+    /// Ok(())
+    /// # }
    /// ```
+    #[must_use]
    pub fn and_offset(self, offset: usize) -> TopDocs {
        TopDocs(self.0.and_offset(offset))
    }

    /// Set top-K to rank documents by a given fast field.
    ///
-    /// If the field is not a fast or does not exist, this method returns successfully (it is not aware of any schema).
-    /// An error will be returned at the moment of search.
+    /// If the field is not a fast or does not exist, this method returns successfully (it is not
+    /// aware of any schema). An error will be returned at the moment of search.
    ///
-    /// If the field is a FAST field but not a u64 field, search will return successfully but it will return
-    /// returns a monotonic u64-representation (ie. the order is still correct) of the requested field type.
+    /// If the field is a FAST field but not a u64 field, search will return successfully but it
+    /// will return returns a monotonic u64-representation (ie. the order is still correct) of
+    /// the requested field type.
    ///
    /// # Example
    ///
@@ -234,11 +239,11 @@ impl TopDocs {
    /// #
    /// #   let index = Index::create_in_ram(schema);
    /// #   let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
-    /// #   index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64));
-    /// #   index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64));
-    /// #   index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64));
-    /// #   index_writer.add_document(doc!(title => "The Diary of a Young Girl", rating => 80u64));
-    /// #   assert!(index_writer.commit().is_ok());
+    /// #   index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64))?;
+    /// #   index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64))?;
+    /// #   index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64))?;
+    /// #   index_writer.add_document(doc!(title => "The Diary of a Young Girl", rating => 80u64))?;
+    /// #   index_writer.commit()?;
    /// #   let reader = index.reader()?;
    /// #   let query = QueryParser::for_index(&index, vec![title]).parse_query("diary")?;
    /// #   let top_docs = docs_sorted_by_rating(&reader.searcher(), &query, rating)?;
@@ -289,14 +294,15 @@ impl TopDocs {

    /// Set top-K to rank documents by a given fast field.
    ///
-    /// If the field is not a fast field, or its field type does not match the generic type, this method does not panic,
-    /// but an explicit error will be returned at the moment of collection.
+    /// If the field is not a fast field, or its field type does not match the generic type, this
+    /// method does not panic, but an explicit error will be returned at the moment of
+    /// collection.
    ///
    /// Note that this method is a generic. The requested fast field type will be often
    /// inferred in your code by the rust compiler.
    ///
-    /// Implementation-wise, for performance reason, tantivy will manipulate the u64 representation of your fast
-    /// field until the last moment.
+    /// Implementation-wise, for performance reason, tantivy will manipulate the u64 representation
+    /// of your fast field until the last moment.
    ///
    /// # Example
    ///
@@ -316,9 +322,9 @@ impl TopDocs {
    /// #
    /// #   let index = Index::create_in_ram(schema);
    /// #   let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
-    /// #   index_writer.add_document(doc!(title => "MadCow Inc.", rating => 92_000_000i64));
-    /// #   index_writer.add_document(doc!(title => "Zozo Cow KKK", rating => 119_000_000i64));
-    /// #   index_writer.add_document(doc!(title => "Declining Cow", rating => -63_000_000i64));
+    /// #   index_writer.add_document(doc!(title => "MadCow Inc.", rating => 92_000_000i64))?;
+    /// #   index_writer.add_document(doc!(title => "Zozo Cow KKK", rating => 119_000_000i64))?;
+    /// #   index_writer.add_document(doc!(title => "Declining Cow", rating => -63_000_000i64))?;
    /// #   assert!(index_writer.commit().is_ok());
    /// #   let reader = index.reader()?;
    /// #   let top_docs = docs_sorted_by_revenue(&reader.searcher(), &AllQuery, rating)?;
@@ -417,9 +423,9 @@ impl TopDocs {
    ///   let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
    ///   let product_name = index.schema().get_field("product_name").unwrap();
    ///   let popularity: Field = index.schema().get_field("popularity").unwrap();
-    ///   index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64));
-    ///   index_writer.add_document(doc!(product_name => "A Dairy Cow", popularity => 10u64));
-    ///   index_writer.add_document(doc!(product_name => "The Diary of a Young Girl", popularity => 15u64));
+    ///   index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64))?;
+    ///   index_writer.add_document(doc!(product_name => "A Dairy Cow", popularity => 10u64))?;
+    ///   index_writer.add_document(doc!(product_name => "The Diary of a Young Girl", popularity => 15u64))?;
    ///   index_writer.commit()?;
    ///   Ok(index)
    /// }
@@ -527,9 +533,9 @@ impl TopDocs {
    /// #
    /// let popularity: Field = index.schema().get_field("popularity").unwrap();
    /// let boosted: Field = index.schema().get_field("boosted").unwrap();
-    /// #   index_writer.add_document(doc!(boosted=>1u64, product_name => "The Diary of Muadib", popularity => 1u64));
-    /// #   index_writer.add_document(doc!(boosted=>0u64, product_name => "A Dairy Cow", popularity => 10u64));
-    /// #   index_writer.add_document(doc!(boosted=>0u64, product_name => "The Diary of a Young Girl", popularity => 15u64));
+    /// #   index_writer.add_document(doc!(boosted=>1u64, product_name => "The Diary of Muadib", popularity => 1u64))?;
+    /// #   index_writer.add_document(doc!(boosted=>0u64, product_name => "A Dairy Cow", popularity => 10u64))?;
+    /// #   index_writer.add_document(doc!(boosted=>0u64, product_name => "The Diary of a Young Girl", popularity => 15u64))?;
    /// #   index_writer.commit()?;
    /// // ...
    /// # let user_query = "diary";
@@ -629,10 +635,10 @@ impl Collector for TopDocs {
        let heap_len = self.0.limit + self.0.offset;
        let mut heap: BinaryHeap<ComparableDoc<Score, DocId>> = BinaryHeap::with_capacity(heap_len);

-        if let Some(delete_bitset) = reader.delete_bitset() {
+        if let Some(alive_bitset) = reader.alive_bitset() {
            let mut threshold = Score::MIN;
            weight.for_each_pruning(threshold, reader, &mut |doc, score| {
-                if delete_bitset.is_deleted(doc) {
+                if alive_bitset.is_deleted(doc) {
                    return threshold;
                }
                let heap_item = ComparableDoc {
@@ -708,25 +714,20 @@ mod tests {
    use crate::collector::Collector;
    use crate::query::{AllQuery, Query, QueryParser};
    use crate::schema::{Field, Schema, FAST, STORED, TEXT};
-    use crate::Index;
-    use crate::IndexWriter;
-    use crate::Score;
-    use crate::{DocAddress, DocId, SegmentReader};
+    use crate::{DocAddress, DocId, Index, IndexWriter, Score, SegmentReader};

-    fn make_index() -> Index {
+    fn make_index() -> crate::Result<Index> {
        let mut schema_builder = Schema::builder();
        let text_field = schema_builder.add_text_field("text", TEXT);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        {
-            // writing the segment
-            let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
-            index_writer.add_document(doc!(text_field=>"Hello happy tax payer."));
-            index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer"));
-            index_writer.add_document(doc!(text_field=>"I like Droopy"));
-            assert!(index_writer.commit().is_ok());
-        }
-        index
+        // writing the segment
+        let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+        index_writer.add_document(doc!(text_field=>"Hello happy tax payer."))?;
+        index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer"))?;
+        index_writer.add_document(doc!(text_field=>"I like Droopy"))?;
+        index_writer.commit()?;
+        Ok(index)
    }

    fn assert_results_equals(results: &[(Score, DocAddress)], expected: &[(Score, DocAddress)]) {
@@ -737,17 +738,15 @@ mod tests {
    }

    #[test]
-    fn test_top_collector_not_at_capacity_without_offset() {
-        let index = make_index();
+    fn test_top_collector_not_at_capacity_without_offset() -> crate::Result<()> {
+        let index = make_index()?;
        let field = index.schema().get_field("text").unwrap();
        let query_parser = QueryParser::for_index(&index, vec![field]);
-        let text_query = query_parser.parse_query("droopy tax").unwrap();
+        let text_query = query_parser.parse_query("droopy tax")?;
        let score_docs: Vec<(Score, DocAddress)> = index
-            .reader()
-            .unwrap()
+            .reader()?
            .searcher()
-            .search(&text_query, &TopDocs::with_limit(4))
-            .unwrap();
+            .search(&text_query, &TopDocs::with_limit(4))?;
        assert_results_equals(
            &score_docs,
            &[
@@ -756,11 +755,12 @@ mod tests {
                (0.48527452, DocAddress::new(0, 0)),
            ],
        );
+        Ok(())
    }

    #[test]
    fn test_top_collector_not_at_capacity_with_offset() {
-        let index = make_index();
+        let index = make_index().unwrap();
        let field = index.schema().get_field("text").unwrap();
        let query_parser = QueryParser::for_index(&index, vec![field]);
        let text_query = query_parser.parse_query("droopy tax").unwrap();
@@ -775,7 +775,7 @@ mod tests {

    #[test]
    fn test_top_collector_at_capacity() {
-        let index = make_index();
+        let index = make_index().unwrap();
        let field = index.schema().get_field("text").unwrap();
        let query_parser = QueryParser::for_index(&index, vec![field]);
        let text_query = query_parser.parse_query("droopy tax").unwrap();
@@ -796,7 +796,7 @@ mod tests {

    #[test]
    fn test_top_collector_at_capacity_with_offset() {
-        let index = make_index();
+        let index = make_index().unwrap();
        let field = index.schema().get_field("text").unwrap();
        let query_parser = QueryParser::for_index(&index, vec![field]);
        let text_query = query_parser.parse_query("droopy tax").unwrap();
@@ -817,7 +817,7 @@ mod tests {

    #[test]
    fn test_top_collector_stable_sorting() {
-        let index = make_index();
+        let index = make_index().unwrap();

        // using AllQuery to get a constant score
        let searcher = index.reader().unwrap().searcher();
@@ -848,29 +848,35 @@ mod tests {
    const SIZE: &str = "size";

    #[test]
-    fn test_top_field_collector_not_at_capacity() {
+    fn test_top_field_collector_not_at_capacity() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let title = schema_builder.add_text_field(TITLE, TEXT);
        let size = schema_builder.add_u64_field(SIZE, FAST);
        let schema = schema_builder.build();
        let (index, query) = index("beer", title, schema, |index_writer| {
-            index_writer.add_document(doc!(
-                title => "bottle of beer",
-                size => 12u64,
-            ));
-            index_writer.add_document(doc!(
-                title => "growler of beer",
-                size => 64u64,
-            ));
-            index_writer.add_document(doc!(
-                title => "pint of beer",
-                size => 16u64,
-            ));
+            index_writer
+                .add_document(doc!(
+                    title => "bottle of beer",
+                    size => 12u64,
+                ))
+                .unwrap();
+            index_writer
+                .add_document(doc!(
+                    title => "growler of beer",
+                    size => 64u64,
+                ))
+                .unwrap();
+            index_writer
+                .add_document(doc!(
+                    title => "pint of beer",
+                    size => 16u64,
+                ))
+                .unwrap();
        });
-        let searcher = index.reader().unwrap().searcher();
+        let searcher = index.reader()?.searcher();

        let top_collector = TopDocs::with_limit(4).order_by_u64_field(size);
-        let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector).unwrap();
+        let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector)?;
        assert_eq!(
            &top_docs[..],
            &[
@@ -879,6 +885,7 @@ mod tests {
                (12, DocAddress::new(0, 0))
            ]
        );
+        Ok(())
    }

    #[test]
@@ -894,12 +901,12 @@ mod tests {
        index_writer.add_document(doc!(
            name => "Paul Robeson",
            birthday => pr_birthday
-        ));
+        ))?;
        let mr_birthday = crate::DateTime::from_str("1947-11-08T00:00:00+00:00")?;
        index_writer.add_document(doc!(
            name => "Minnie Riperton",
            birthday => mr_birthday
-        ));
+        ))?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let top_collector = TopDocs::with_limit(3).order_by_fast_field(birthday);
@@ -926,11 +933,11 @@ mod tests {
        index_writer.add_document(doc!(
                city => "georgetown",
                altitude =>  -1i64,
-        ));
+        ))?;
        index_writer.add_document(doc!(
            city => "tokyo",
            altitude =>  40i64,
-        ));
+        ))?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let top_collector = TopDocs::with_limit(3).order_by_fast_field(altitude);
@@ -956,11 +963,11 @@ mod tests {
        index_writer.add_document(doc!(
                city => "georgetown",
                altitude =>  -1.0f64,
-        ));
+        ))?;
        index_writer.add_document(doc!(
            city => "tokyo",
            altitude =>  40f64,
-        ));
+        ))?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let top_collector = TopDocs::with_limit(3).order_by_fast_field(altitude);
@@ -983,10 +990,12 @@ mod tests {
        let size = schema_builder.add_u64_field(SIZE, FAST);
        let schema = schema_builder.build();
        let (index, _) = index("beer", title, schema, |index_writer| {
-            index_writer.add_document(doc!(
-                title => "bottle of beer",
-                size => 12u64,
-            ));
+            index_writer
+                .add_document(doc!(
+                    title => "bottle of beer",
+                    size => 12u64,
+                ))
+                .unwrap();
        });
        let searcher = index.reader().unwrap().searcher();
        let top_collector = TopDocs::with_limit(4).order_by_u64_field(Field::from_field_id(2));
@@ -1003,7 +1012,7 @@ mod tests {
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_for_tests()?;
-        index_writer.add_document(doc!(size=>1u64));
+        index_writer.add_document(doc!(size=>1u64))?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let segment = searcher.segment_reader(0);
@@ -1020,7 +1029,7 @@ mod tests {
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_for_tests()?;
-        index_writer.add_document(doc!(size=>1u64));
+        index_writer.add_document(doc!(size=>1u64))?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let segment = searcher.segment_reader(0);
@@ -1033,30 +1042,26 @@ mod tests {
    }

    #[test]
-    fn test_tweak_score_top_collector_with_offset() {
-        let index = make_index();
+    fn test_tweak_score_top_collector_with_offset() -> crate::Result<()> {
+        let index = make_index()?;
        let field = index.schema().get_field("text").unwrap();
        let query_parser = QueryParser::for_index(&index, vec![field]);
-        let text_query = query_parser.parse_query("droopy tax").unwrap();
+        let text_query = query_parser.parse_query("droopy tax")?;
        let collector = TopDocs::with_limit(2).and_offset(1).tweak_score(
            move |_segment_reader: &SegmentReader| move |doc: DocId, _original_score: Score| doc,
        );
-        let score_docs: Vec<(u32, DocAddress)> = index
-            .reader()
-            .unwrap()
-            .searcher()
-            .search(&text_query, &collector)
-            .unwrap();
-
+        let score_docs: Vec<(u32, DocAddress)> =
+            index.reader()?.searcher().search(&text_query, &collector)?;
        assert_eq!(
            score_docs,
            vec![(1, DocAddress::new(0, 1)), (0, DocAddress::new(0, 0)),]
        );
+        Ok(())
    }

    #[test]
    fn test_custom_score_top_collector_with_offset() {
-        let index = make_index();
+        let index = make_index().unwrap();
        let field = index.schema().get_field("text").unwrap();
        let query_parser = QueryParser::for_index(&index, vec![field]);
        let text_query = query_parser.parse_query("droopy tax").unwrap();
--- a/src/collector/tweak_score_top_collector.rs
+++ b/src/collector/tweak_score_top_collector.rs
@@ -1,7 +1,6 @@
 use crate::collector::top_collector::{TopCollector, TopSegmentCollector};
 use crate::collector::{Collector, SegmentCollector};
-use crate::DocAddress;
-use crate::{DocId, Result, Score, SegmentReader};
+use crate::{DocAddress, DocId, Result, Score, SegmentReader};

 pub(crate) struct TweakedScoreTopCollector<TScoreTweaker, TScore = Score> {
    score_tweaker: TScoreTweaker,
@@ -9,8 +8,7 @@ pub(crate) struct TweakedScoreTopCollector<TScoreTweaker, TScore = Score> {
 }

 impl<TScoreTweaker, TScore> TweakedScoreTopCollector<TScoreTweaker, TScore>
-where
-    TScore: Clone + PartialOrd,
+where TScore: Clone + PartialOrd
 {
    pub fn new(
        score_tweaker: TScoreTweaker,
@@ -118,8 +116,7 @@ where
 }

 impl<F, TScore> ScoreSegmentTweaker<TScore> for F
-where
-    F: 'static + FnMut(DocId, Score) -> TScore,
+where F: 'static + FnMut(DocId, Score) -> TScore
 {
    fn score(&mut self, doc: DocId, score: Score) -> TScore {
        (self)(doc, score)
--- a/src/common/bitset.rs
+++ b/src/common/bitset.rs
@@ -1,396 +0,0 @@
-use std::fmt;
-use std::u64;
-
-#[derive(Clone, Copy, Eq, PartialEq)]
-pub(crate) struct TinySet(u64);
-
-impl fmt::Debug for TinySet {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        self.into_iter().collect::<Vec<u32>>().fmt(f)
-    }
-}
-
-pub struct TinySetIterator(TinySet);
-impl Iterator for TinySetIterator {
-    type Item = u32;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.0.pop_lowest()
-    }
-}
-
-impl IntoIterator for TinySet {
-    type Item = u32;
-    type IntoIter = TinySetIterator;
-    fn into_iter(self) -> Self::IntoIter {
-        TinySetIterator(self)
-    }
-}
-
-impl TinySet {
-    /// Returns an empty `TinySet`.
-    pub fn empty() -> TinySet {
-        TinySet(0u64)
-    }
-
-    pub fn clear(&mut self) {
-        self.0 = 0u64;
-    }
-
-    /// Returns the complement of the set in `[0, 64[`.
-    fn complement(self) -> TinySet {
-        TinySet(!self.0)
-    }
-
-    /// Returns true iff the `TinySet` contains the element `el`.
-    pub fn contains(self, el: u32) -> bool {
-        !self.intersect(TinySet::singleton(el)).is_empty()
-    }
-
-    /// Returns the number of elements in the TinySet.
-    pub fn len(self) -> u32 {
-        self.0.count_ones()
-    }
-
-    /// Returns the intersection of `self` and `other`
-    pub fn intersect(self, other: TinySet) -> TinySet {
-        TinySet(self.0 & other.0)
-    }
-
-    /// Creates a new `TinySet` containing only one element
-    /// within `[0; 64[`
-    #[inline]
-    pub fn singleton(el: u32) -> TinySet {
-        TinySet(1u64 << u64::from(el))
-    }
-
-    /// Insert a new element within [0..64[
-    #[inline]
-    pub fn insert(self, el: u32) -> TinySet {
-        self.union(TinySet::singleton(el))
-    }
-
-    /// Insert a new element within [0..64[
-    #[inline]
-    pub fn insert_mut(&mut self, el: u32) -> bool {
-        let old = *self;
-        *self = old.insert(el);
-        old != *self
-    }
-
-    /// Returns the union of two tinysets
-    #[inline]
-    pub fn union(self, other: TinySet) -> TinySet {
-        TinySet(self.0 | other.0)
-    }
-
-    /// Returns true iff the `TinySet` is empty.
-    #[inline]
-    pub fn is_empty(self) -> bool {
-        self.0 == 0u64
-    }
-
-    /// Returns the lowest element in the `TinySet`
-    /// and removes it.
-    #[inline]
-    pub fn pop_lowest(&mut self) -> Option<u32> {
-        if self.is_empty() {
-            None
-        } else {
-            let lowest = self.0.trailing_zeros() as u32;
-            self.0 ^= TinySet::singleton(lowest).0;
-            Some(lowest)
-        }
-    }
-
-    /// Returns a `TinySet` than contains all values up
-    /// to limit excluded.
-    ///
-    /// The limit is assumed to be strictly lower than 64.
-    pub fn range_lower(upper_bound: u32) -> TinySet {
-        TinySet((1u64 << u64::from(upper_bound % 64u32)) - 1u64)
-    }
-
-    /// Returns a `TinySet` that contains all values greater
-    /// or equal to the given limit, included. (and up to 63)
-    ///
-    /// The limit is assumed to be strictly lower than 64.
-    pub fn range_greater_or_equal(from_included: u32) -> TinySet {
-        TinySet::range_lower(from_included).complement()
-    }
-}
-
-#[derive(Clone)]
-pub struct BitSet {
-    tinysets: Box<[TinySet]>,
-    len: usize,
-    max_value: u32,
-}
-
-fn num_buckets(max_val: u32) -> u32 {
-    (max_val + 63u32) / 64u32
-}
-
-impl BitSet {
-    /// Create a new `BitSet` that may contain elements
-    /// within `[0, max_val[`.
-    pub fn with_max_value(max_value: u32) -> BitSet {
-        let num_buckets = num_buckets(max_value);
-        let tinybisets = vec![TinySet::empty(); num_buckets as usize].into_boxed_slice();
-        BitSet {
-            tinysets: tinybisets,
-            len: 0,
-            max_value,
-        }
-    }
-
-    /// Removes all elements from the `BitSet`.
-    pub fn clear(&mut self) {
-        for tinyset in self.tinysets.iter_mut() {
-            *tinyset = TinySet::empty();
-        }
-    }
-
-    /// Returns the number of elements in the `BitSet`.
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Inserts an element in the `BitSet`
-    pub fn insert(&mut self, el: u32) {
-        // we do not check saturated els.
-        let higher = el / 64u32;
-        let lower = el % 64u32;
-        self.len += if self.tinysets[higher as usize].insert_mut(lower) {
-            1
-        } else {
-            0
-        };
-    }
-
-    /// Returns true iff the elements is in the `BitSet`.
-    pub fn contains(&self, el: u32) -> bool {
-        self.tinyset(el / 64u32).contains(el % 64)
-    }
-
-    /// Returns the first non-empty `TinySet` associated to a bucket lower
-    /// or greater than bucket.
-    ///
-    /// Reminder: the tiny set with the bucket `bucket`, represents the
-    /// elements from `bucket * 64` to `(bucket+1) * 64`.
-    pub(crate) fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
-        self.tinysets[bucket as usize..]
-            .iter()
-            .cloned()
-            .position(|tinyset| !tinyset.is_empty())
-            .map(|delta_bucket| bucket + delta_bucket as u32)
-    }
-
-    pub fn max_value(&self) -> u32 {
-        self.max_value
-    }
-
-    /// Returns the tiny bitset representing the
-    /// the set restricted to the number range from
-    /// `bucket * 64` to `(bucket + 1) * 64`.
-    pub(crate) fn tinyset(&self, bucket: u32) -> TinySet {
-        self.tinysets[bucket as usize]
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::BitSet;
-    use super::TinySet;
-    use crate::docset::{DocSet, TERMINATED};
-    use crate::query::BitSetDocSet;
-    use crate::tests;
-    use crate::tests::generate_nonunique_unsorted;
-    use std::collections::BTreeSet;
-    use std::collections::HashSet;
-
-    #[test]
-    fn test_tiny_set() {
-        assert!(TinySet::empty().is_empty());
-        {
-            let mut u = TinySet::empty().insert(1u32);
-            assert_eq!(u.pop_lowest(), Some(1u32));
-            assert!(u.pop_lowest().is_none())
-        }
-        {
-            let mut u = TinySet::empty().insert(1u32).insert(1u32);
-            assert_eq!(u.pop_lowest(), Some(1u32));
-            assert!(u.pop_lowest().is_none())
-        }
-        {
-            let mut u = TinySet::empty().insert(2u32);
-            assert_eq!(u.pop_lowest(), Some(2u32));
-            u.insert_mut(1u32);
-            assert_eq!(u.pop_lowest(), Some(1u32));
-            assert!(u.pop_lowest().is_none());
-        }
-        {
-            let mut u = TinySet::empty().insert(63u32);
-            assert_eq!(u.pop_lowest(), Some(63u32));
-            assert!(u.pop_lowest().is_none());
-        }
-    }
-
-    #[test]
-    fn test_bitset() {
-        let test_against_hashset = |els: &[u32], max_value: u32| {
-            let mut hashset: HashSet<u32> = HashSet::new();
-            let mut bitset = BitSet::with_max_value(max_value);
-            for &el in els {
-                assert!(el < max_value);
-                hashset.insert(el);
-                bitset.insert(el);
-            }
-            for el in 0..max_value {
-                assert_eq!(hashset.contains(&el), bitset.contains(el));
-            }
-            assert_eq!(bitset.max_value(), max_value);
-        };
-
-        test_against_hashset(&[], 0);
-        test_against_hashset(&[], 1);
-        test_against_hashset(&[0u32], 1);
-        test_against_hashset(&[0u32], 100);
-        test_against_hashset(&[1u32, 2u32], 4);
-        test_against_hashset(&[99u32], 100);
-        test_against_hashset(&[63u32], 64);
-        test_against_hashset(&[62u32, 63u32], 64);
-    }
-
-    #[test]
-    fn test_bitset_large() {
-        let arr = generate_nonunique_unsorted(100_000, 5_000);
-        let mut btreeset: BTreeSet<u32> = BTreeSet::new();
-        let mut bitset = BitSet::with_max_value(100_000);
-        for el in arr {
-            btreeset.insert(el);
-            bitset.insert(el);
-        }
-        for i in 0..100_000 {
-            assert_eq!(btreeset.contains(&i), bitset.contains(i));
-        }
-        assert_eq!(btreeset.len(), bitset.len());
-        let mut bitset_docset = BitSetDocSet::from(bitset);
-        let mut remaining = true;
-        for el in btreeset.into_iter() {
-            assert!(remaining);
-            assert_eq!(bitset_docset.doc(), el);
-            remaining = bitset_docset.advance() != TERMINATED;
-        }
-        assert!(!remaining);
-    }
-
-    #[test]
-    fn test_bitset_num_buckets() {
-        use super::num_buckets;
-        assert_eq!(num_buckets(0u32), 0);
-        assert_eq!(num_buckets(1u32), 1);
-        assert_eq!(num_buckets(64u32), 1);
-        assert_eq!(num_buckets(65u32), 2);
-        assert_eq!(num_buckets(128u32), 2);
-        assert_eq!(num_buckets(129u32), 3);
-    }
-
-    #[test]
-    fn test_tinyset_range() {
-        assert_eq!(
-            TinySet::range_lower(3).into_iter().collect::<Vec<u32>>(),
-            [0, 1, 2]
-        );
-        assert!(TinySet::range_lower(0).is_empty());
-        assert_eq!(
-            TinySet::range_lower(63).into_iter().collect::<Vec<u32>>(),
-            (0u32..63u32).collect::<Vec<_>>()
-        );
-        assert_eq!(
-            TinySet::range_lower(1).into_iter().collect::<Vec<u32>>(),
-            [0]
-        );
-        assert_eq!(
-            TinySet::range_lower(2).into_iter().collect::<Vec<u32>>(),
-            [0, 1]
-        );
-        assert_eq!(
-            TinySet::range_greater_or_equal(3)
-                .into_iter()
-                .collect::<Vec<u32>>(),
-            (3u32..64u32).collect::<Vec<_>>()
-        );
-    }
-
-    #[test]
-    fn test_bitset_len() {
-        let mut bitset = BitSet::with_max_value(1_000);
-        assert_eq!(bitset.len(), 0);
-        bitset.insert(3u32);
-        assert_eq!(bitset.len(), 1);
-        bitset.insert(103u32);
-        assert_eq!(bitset.len(), 2);
-        bitset.insert(3u32);
-        assert_eq!(bitset.len(), 2);
-        bitset.insert(103u32);
-        assert_eq!(bitset.len(), 2);
-        bitset.insert(104u32);
-        assert_eq!(bitset.len(), 3);
-    }
-
-    #[test]
-    fn test_bitset_clear() {
-        let mut bitset = BitSet::with_max_value(1_000);
-        let els = tests::sample(1_000, 0.01f64);
-        for &el in &els {
-            bitset.insert(el);
-        }
-        assert!(els.iter().all(|el| bitset.contains(*el)));
-        bitset.clear();
-        for el in 0u32..1000u32 {
-            assert!(!bitset.contains(el));
-        }
-    }
-}
-
-#[cfg(all(test, feature = "unstable"))]
-mod bench {
-
-    use super::BitSet;
-    use super::TinySet;
-    use test;
-
-    #[bench]
-    fn bench_tinyset_pop(b: &mut test::Bencher) {
-        b.iter(|| {
-            let mut tinyset = TinySet::singleton(test::black_box(31u32));
-            tinyset.pop_lowest();
-            tinyset.pop_lowest();
-            tinyset.pop_lowest();
-            tinyset.pop_lowest();
-            tinyset.pop_lowest();
-            tinyset.pop_lowest();
-        });
-    }
-
-    #[bench]
-    fn bench_tinyset_sum(b: &mut test::Bencher) {
-        let tiny_set = TinySet::empty().insert(10u32).insert(14u32).insert(21u32);
-        b.iter(|| {
-            assert_eq!(test::black_box(tiny_set).into_iter().sum::<u32>(), 45u32);
-        });
-    }
-
-    #[bench]
-    fn bench_tinyarr_sum(b: &mut test::Bencher) {
-        let v = [10u32, 14u32, 21u32];
-        b.iter(|| test::black_box(v).iter().cloned().sum::<u32>());
-    }
-
-    #[bench]
-    fn bench_bitset_initialize(b: &mut test::Bencher) {
-        b.iter(|| BitSet::with_max_value(1_000_000));
-    }
-}
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -1,203 +0,0 @@
-mod bitset;
-mod composite_file;
-
-pub use self::bitset::BitSet;
-pub(crate) use self::bitset::TinySet;
-pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
-pub use byteorder::LittleEndian as Endianness;
-pub use common::CountingWriter;
-pub use common::{
-    read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt,
-};
-pub use common::{BinarySerializable, DeserializeFrom, FixedSize};
-
-/// Segment's max doc must be `< MAX_DOC_LIMIT`.
-///
-/// We do not allow segments with more than
-pub const MAX_DOC_LIMIT: u32 = 1 << 31;
-
-/// Has length trait
-pub trait HasLen {
-    /// Return length
-    fn len(&self) -> usize;
-
-    /// Returns true iff empty.
-    fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-}
-
-const HIGHEST_BIT: u64 = 1 << 63;
-
-/// Maps a `i64` to `u64`
-///
-/// For simplicity, tantivy internally handles `i64` as `u64`.
-/// The mapping is defined by this function.
-///
-/// Maps `i64` to `u64` so that
-/// `-2^63 .. 2^63-1` is mapped
-///     to
-/// `0 .. 2^64-1`
-/// in that order.
-///
-/// This is more suited than simply casting (`val as u64`)
-/// because of bitpacking.
-///
-/// Imagine a list of `i64` ranging from -10 to 10.
-/// When casting negative values, the negative values are projected
-/// to values over 2^63, and all values end up requiring 64 bits.
-///
-/// # See also
-/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html).
-#[inline]
-pub fn i64_to_u64(val: i64) -> u64 {
-    (val as u64) ^ HIGHEST_BIT
-}
-
-/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
-#[inline]
-pub fn u64_to_i64(val: u64) -> i64 {
-    (val ^ HIGHEST_BIT) as i64
-}
-
-/// Maps a `f64` to `u64`
-///
-/// For simplicity, tantivy internally handles `f64` as `u64`.
-/// The mapping is defined by this function.
-///
-/// Maps `f64` to `u64` in a monotonic manner, so that bytes lexical order is preserved.
-///
-/// This is more suited than simply casting (`val as u64`)
-/// which would truncate the result
-///
-/// # Reference
-///
-/// Daniel Lemire's [blog post](https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order/)
-/// explains the mapping in a clear manner.
-///
-/// # See also
-/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html).
-#[inline]
-pub fn f64_to_u64(val: f64) -> u64 {
-    let bits = val.to_bits();
-    if val.is_sign_positive() {
-        bits ^ HIGHEST_BIT
-    } else {
-        !bits
-    }
-}
-
-/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
-#[inline]
-pub fn u64_to_f64(val: u64) -> f64 {
-    f64::from_bits(if val & HIGHEST_BIT != 0 {
-        val ^ HIGHEST_BIT
-    } else {
-        !val
-    })
-}
-
-#[cfg(test)]
-pub(crate) mod test {
-
-    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
-    use common::{BinarySerializable, FixedSize};
-    use proptest::prelude::*;
-    use std::f64;
-    use tantivy_bitpacker::compute_num_bits;
-    pub use tantivy_bitpacker::minmax;
-
-    fn test_i64_converter_helper(val: i64) {
-        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
-    }
-
-    fn test_f64_converter_helper(val: f64) {
-        assert_eq!(u64_to_f64(f64_to_u64(val)), val);
-    }
-
-    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
-        let mut buffer = Vec::new();
-        O::default().serialize(&mut buffer).unwrap();
-        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
-    }
-
-    proptest! {
-        #[test]
-        fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) {
-            let left_u64 = f64_to_u64(left);
-            let right_u64 = f64_to_u64(right);
-            assert_eq!(left_u64 < right_u64,  left < right);
-        }
-    }
-
-    #[test]
-    fn test_i64_converter() {
-        assert_eq!(i64_to_u64(i64::min_value()), u64::min_value());
-        assert_eq!(i64_to_u64(i64::max_value()), u64::max_value());
-        test_i64_converter_helper(0i64);
-        test_i64_converter_helper(i64::min_value());
-        test_i64_converter_helper(i64::max_value());
-        for i in -1000i64..1000i64 {
-            test_i64_converter_helper(i);
-        }
-    }
-
-    #[test]
-    fn test_f64_converter() {
-        test_f64_converter_helper(f64::INFINITY);
-        test_f64_converter_helper(f64::NEG_INFINITY);
-        test_f64_converter_helper(0.0);
-        test_f64_converter_helper(-0.0);
-        test_f64_converter_helper(1.0);
-        test_f64_converter_helper(-1.0);
-    }
-
-    #[test]
-    fn test_f64_order() {
-        assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
-            .contains(&f64_to_u64(f64::NAN))); //nan is not a number
-        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
-        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
-        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
-        assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
-        assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
-        assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
-        assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5));
-    }
-
-    #[test]
-    fn test_compute_num_bits() {
-        assert_eq!(compute_num_bits(1), 1u8);
-        assert_eq!(compute_num_bits(0), 0u8);
-        assert_eq!(compute_num_bits(2), 2u8);
-        assert_eq!(compute_num_bits(3), 2u8);
-        assert_eq!(compute_num_bits(4), 3u8);
-        assert_eq!(compute_num_bits(255), 8u8);
-        assert_eq!(compute_num_bits(256), 9u8);
-        assert_eq!(compute_num_bits(5_000_000_000), 33u8);
-    }
-
-    #[test]
-    fn test_max_doc() {
-        // this is the first time I write a unit test for a constant.
-        assert!(((super::MAX_DOC_LIMIT - 1) as i32) >= 0);
-        assert!((super::MAX_DOC_LIMIT as i32) < 0);
-    }
-
-    #[test]
-    fn test_minmax_empty() {
-        let vals: Vec<u32> = vec![];
-        assert_eq!(minmax(vals.into_iter()), None);
-    }
-
-    #[test]
-    fn test_minmax_one() {
-        assert_eq!(minmax(vec![1].into_iter()), Some((1, 1)));
-    }
-
-    #[test]
-    fn test_minmax_two() {
-        assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2)));
-        assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2)));
-    }
-}
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -57,7 +57,11 @@ impl Executor {
                                let (idx, arg) = arg_with_idx;
                                let fruit = f(arg);
                                if let Err(err) = fruit_sender.send((idx, fruit)) {
-                                    error!("Failed to send search task. It probably means all search threads have panicked. {:?}", err);
+                                    error!(
+                                        "Failed to send search task. It probably means all search \
+                                         threads have panicked. {:?}",
+                                        err
+                                    );
                                }
                            });
                        }
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -1,35 +1,27 @@
-use super::{segment::Segment, IndexSettings};
-use crate::core::Executor;
-use crate::core::IndexMeta;
-use crate::core::SegmentId;
-use crate::core::SegmentMeta;
-use crate::core::SegmentMetaInventory;
-use crate::core::META_FILEPATH;
-use crate::directory::error::OpenReadError;
-use crate::directory::ManagedDirectory;
-#[cfg(feature = "mmap")]
-use crate::directory::MmapDirectory;
-use crate::directory::INDEX_WRITER_LOCK;
-use crate::directory::{Directory, RamDirectory};
-use crate::error::DataCorruption;
-use crate::error::TantivyError;
-use crate::indexer::index_writer::{HEAP_SIZE_MIN, MAX_NUM_THREAD};
-use crate::indexer::segment_updater::save_new_metas;
-use crate::reader::IndexReader;
-use crate::reader::IndexReaderBuilder;
-use crate::schema::Field;
-use crate::schema::FieldType;
-use crate::schema::Schema;
-use crate::tokenizer::{TextAnalyzer, TokenizerManager};
-use crate::IndexWriter;
 use std::collections::HashSet;
 use std::fmt;
-
 #[cfg(feature = "mmap")]
 use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;

+use super::segment::Segment;
+use super::IndexSettings;
+use crate::core::{
+    Executor, IndexMeta, SegmentId, SegmentMeta, SegmentMetaInventory, META_FILEPATH,
+};
+use crate::directory::error::OpenReadError;
+#[cfg(feature = "mmap")]
+use crate::directory::MmapDirectory;
+use crate::directory::{Directory, ManagedDirectory, RamDirectory, INDEX_WRITER_LOCK};
+use crate::error::{DataCorruption, TantivyError};
+use crate::indexer::index_writer::{MAX_NUM_THREAD, MEMORY_ARENA_NUM_BYTES_MIN};
+use crate::indexer::segment_updater::save_new_metas;
+use crate::reader::{IndexReader, IndexReaderBuilder};
+use crate::schema::{Field, FieldType, Schema};
+use crate::tokenizer::{TextAnalyzer, TokenizerManager};
+use crate::IndexWriter;
+
 fn load_metas(
    directory: &dyn Directory,
    inventory: &SegmentMetaInventory,
@@ -78,7 +70,6 @@ fn load_metas(
 /// let schema = schema_builder.build();
 /// let settings = IndexSettings{sort_by_field: Some(IndexSortByField{field:"number".to_string(), order:Order::Asc}), ..Default::default()};
 /// let index = Index::builder().schema(schema).settings(settings).create_in_ram();
-///
 /// ```
 pub struct IndexBuilder {
    schema: Option<Schema>,
@@ -97,16 +88,21 @@ impl IndexBuilder {
            index_settings: IndexSettings::default(),
        }
    }
+
    /// Set the settings
+    #[must_use]
    pub fn settings(mut self, settings: IndexSettings) -> Self {
        self.index_settings = settings;
        self
    }
+
    /// Set the schema
+    #[must_use]
    pub fn schema(mut self, schema: Schema) -> Self {
        self.schema = Some(schema);
        self
    }
+
    /// Creates a new index using the `RAMDirectory`.
    ///
    /// The index will be allocated in anonymous memory.
@@ -117,18 +113,20 @@ impl IndexBuilder {
            .create(ram_directory)
            .expect("Creating a RAMDirectory should never fail"))
    }
+
    /// Creates a new index in a given filepath.
    /// The index will use the `MMapDirectory`.
    ///
-    /// If a previous index was in this directory, then its meta file will be destroyed.
+    /// If a previous index was in this directory, it returns an `IndexAlreadyExists` error.
    #[cfg(feature = "mmap")]
    pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index> {
-        let mmap_directory = MmapDirectory::open(directory_path)?;
-        if Index::exists(&mmap_directory)? {
+        let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::open(directory_path)?);
+        if Index::exists(&*mmap_directory)? {
            return Err(TantivyError::IndexAlreadyExists);
        }
        self.create(mmap_directory)
    }
+
    /// Creates a new index in a temp directory.
    ///
    /// The index will use the `MMapDirectory` in a newly created directory.
@@ -139,18 +137,21 @@ impl IndexBuilder {
    /// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
    #[cfg(feature = "mmap")]
    pub fn create_from_tempdir(self) -> crate::Result<Index> {
-        let mmap_directory = MmapDirectory::create_from_tempdir()?;
+        let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::create_from_tempdir()?);
        self.create(mmap_directory)
    }
+
    fn get_expect_schema(&self) -> crate::Result<Schema> {
        self.schema
            .as_ref()
            .cloned()
            .ok_or(TantivyError::IndexBuilderMissingArgument("schema"))
    }
+
    /// Opens or creates a new index in the provided directory
-    pub fn open_or_create<Dir: Directory>(self, dir: Dir) -> crate::Result<Index> {
-        if !Index::exists(&dir)? {
+    pub fn open_or_create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index> {
+        let dir = dir.into();
+        if !Index::exists(&*dir)? {
            return self.create(dir);
        }
        let index = Index::open(dir)?;
@@ -165,7 +166,8 @@ impl IndexBuilder {
    /// Creates a new index given an implementation of the trait `Directory`.
    ///
    /// If a directory previously existed, it will be erased.
-    fn create<Dir: Directory>(self, dir: Dir) -> crate::Result<Index> {
+    fn create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index> {
+        let dir = dir.into();
        let directory = ManagedDirectory::wrap(dir)?;
        save_new_metas(
            self.get_expect_schema()?,
@@ -198,7 +200,7 @@ impl Index {
    /// Examines the directory to see if it contains an index.
    ///
    /// Effectively, it only checks for the presence of the `meta.json` file.
-    pub fn exists<Dir: Directory>(dir: &Dir) -> Result<bool, OpenReadError> {
+    pub fn exists(dir: &dyn Directory) -> Result<bool, OpenReadError> {
        dir.exists(&META_FILEPATH)
    }

@@ -215,7 +217,7 @@ impl Index {
    /// Replace the default single thread search executor pool
    /// by a thread pool with a given number of threads.
    pub fn set_multithread_executor(&mut self, num_threads: usize) -> crate::Result<()> {
-        self.executor = Arc::new(Executor::multi_thread(num_threads, "thrd-tantivy-search-")?);
+        self.executor = Arc::new(Executor::multi_thread(num_threads, "tantivy-search-")?);
        Ok(())
    }

@@ -229,7 +231,8 @@ impl Index {
    /// Creates a new index using the `RamDirectory`.
    ///
    /// The index will be allocated in anonymous memory.
-    /// This should only be used for unit tests.
+    /// This is useful for indexing small set of documents
+    /// for instances like unit test or temporary in memory index.
    pub fn create_in_ram(schema: Schema) -> Index {
        IndexBuilder::new().schema(schema).create_in_ram().unwrap()
    }
@@ -237,7 +240,7 @@ impl Index {
    /// Creates a new index in a given filepath.
    /// The index will use the `MMapDirectory`.
    ///
-    /// If a previous index was in this directory, then its meta file will be destroyed.
+    /// If a previous index was in this directory, then it returns  an `IndexAlreadyExists` error.
    #[cfg(feature = "mmap")]
    pub fn create_in_dir<P: AsRef<Path>>(
        directory_path: P,
@@ -249,7 +252,11 @@ impl Index {
    }

    /// Opens or creates a new index in the provided directory
-    pub fn open_or_create<Dir: Directory>(dir: Dir, schema: Schema) -> crate::Result<Index> {
+    pub fn open_or_create<T: Into<Box<dyn Directory>>>(
+        dir: T,
+        schema: Schema,
+    ) -> crate::Result<Index> {
+        let dir = dir.into();
        IndexBuilder::new().schema(schema).open_or_create(dir)
    }

@@ -269,11 +276,12 @@ impl Index {
    /// Creates a new index given an implementation of the trait `Directory`.
    ///
    /// If a directory previously existed, it will be erased.
-    pub fn create<Dir: Directory>(
-        dir: Dir,
+    pub fn create<T: Into<Box<dyn Directory>>>(
+        dir: T,
        schema: Schema,
        settings: IndexSettings,
    ) -> crate::Result<Index> {
+        let dir: Box<dyn Directory> = dir.into();
        let mut builder = IndexBuilder::new().schema(schema);
        builder = builder.settings(settings);
        builder.create(dir)
@@ -364,7 +372,8 @@ impl Index {
    }

    /// Open the index using the provided directory
-    pub fn open<D: Directory>(directory: D) -> crate::Result<Index> {
+    pub fn open<T: Into<Box<dyn Directory>>>(directory: T) -> crate::Result<Index> {
+        let directory = directory.into();
        let directory = ManagedDirectory::wrap(directory)?;
        let inventory = SegmentMetaInventory::default();
        let metas = load_metas(&directory, &inventory)?;
@@ -388,19 +397,18 @@ impl Index {
    /// - `num_threads` defines the number of indexing workers that
    /// should work at the same time.
    ///
-    /// - `overall_heap_size_in_bytes` sets the amount of memory
+    /// - `overall_memory_arena_in_bytes` sets the amount of memory
    /// allocated for all indexing thread.
-    /// Each thread will receive a budget of  `overall_heap_size_in_bytes / num_threads`.
+    /// Each thread will receive a budget of  `overall_memory_arena_in_bytes / num_threads`.
    ///
    /// # Errors
    /// If the lockfile already exists, returns `Error::DirectoryLockBusy` or an `Error::IoError`.
-    ///
-    /// # Panics
-    /// If the heap size per thread is too small, panics.
+    /// If the memory arena per thread is too small or too big, returns
+    /// `TantivyError::InvalidArgument`
    pub fn writer_with_num_threads(
        &self,
        num_threads: usize,
-        overall_heap_size_in_bytes: usize,
+        overall_memory_arena_in_bytes: usize,
    ) -> crate::Result<IndexWriter> {
        let directory_lock = self
            .directory
@@ -409,26 +417,25 @@ impl Index {
                TantivyError::LockFailure(
                    err,
                    Some(
-                        "Failed to acquire index lock. If you are using \
-                         a regular directory, this means there is already an \
-                         `IndexWriter` working on this `Directory`, in this process \
-                         or in a different process."
+                        "Failed to acquire index lock. If you are using a regular directory, this \
+                         means there is already an `IndexWriter` working on this `Directory`, in \
+                         this process or in a different process."
                            .to_string(),
                    ),
                )
            })?;
-        let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads;
+        let memory_arena_in_bytes_per_thread = overall_memory_arena_in_bytes / num_threads;
        IndexWriter::new(
            self,
            num_threads,
-            heap_size_in_bytes_per_thread,
+            memory_arena_in_bytes_per_thread,
            directory_lock,
        )
    }

    /// Helper to create an index writer for tests.
    ///
-    /// That index writer only simply has a single thread and a heap of 10 MB.
+    /// That index writer only simply has a single thread and a memory arena of 10 MB.
    /// Using a single thread gives us a deterministic allocation of DocId.
    #[cfg(test)]
    pub fn writer_for_tests(&self) -> crate::Result<IndexWriter> {
@@ -438,31 +445,29 @@ impl Index {
    /// Creates a multithreaded writer
    ///
    /// Tantivy will automatically define the number of threads to use, but
-    /// no more than [`MAX_NUM_THREAD`] threads.
-    /// `overall_heap_size_in_bytes` is the total target memory usage that will be split
+    /// no more than 8 threads.
+    /// `overall_memory_arena_in_bytes` is the total target memory usage that will be split
    /// between a given number of threads.
    ///
    /// # Errors
    /// If the lockfile already exists, returns `Error::FileAlreadyExists`.
-    /// # Panics
-    /// If the heap size per thread is too small, panics.
-    pub fn writer(&self, overall_heap_size_in_bytes: usize) -> crate::Result<IndexWriter> {
+    /// If the memory arena per thread is too small or too big, returns
+    /// `TantivyError::InvalidArgument`
+    pub fn writer(&self, memory_arena_num_bytes: usize) -> crate::Result<IndexWriter> {
        let mut num_threads = std::cmp::min(num_cpus::get(), MAX_NUM_THREAD);
-        let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads;
-        if heap_size_in_bytes_per_thread < HEAP_SIZE_MIN {
-            num_threads = (overall_heap_size_in_bytes / HEAP_SIZE_MIN).max(1);
+        let memory_arena_num_bytes_per_thread = memory_arena_num_bytes / num_threads;
+        if memory_arena_num_bytes_per_thread < MEMORY_ARENA_NUM_BYTES_MIN {
+            num_threads = (memory_arena_num_bytes / MEMORY_ARENA_NUM_BYTES_MIN).max(1);
        }
-        self.writer_with_num_threads(num_threads, overall_heap_size_in_bytes)
+        self.writer_with_num_threads(num_threads, memory_arena_num_bytes)
    }

    /// Accessor to the index settings
-    ///
    pub fn settings(&self) -> &IndexSettings {
        &self.settings
    }

    /// Accessor to the index settings
-    ///
    pub fn settings_mut(&mut self) -> &mut IndexSettings {
        &mut self.settings
    }
@@ -523,7 +528,22 @@ impl Index {

    /// Returns the set of corrupted files
    pub fn validate_checksum(&self) -> crate::Result<HashSet<PathBuf>> {
-        self.directory.list_damaged().map_err(Into::into)
+        let managed_files = self.directory.list_managed_files();
+        let active_segments_files: HashSet<PathBuf> = self
+            .searchable_segment_metas()?
+            .iter()
+            .flat_map(|segment_meta| segment_meta.list_files())
+            .collect();
+        let active_existing_files: HashSet<&PathBuf> =
+            active_segments_files.intersection(&managed_files).collect();
+
+        let mut damaged_files = HashSet::new();
+        for path in active_existing_files {
+            if !self.directory.validate_checksum(path)? {
+                damaged_files.insert((*path).clone());
+            }
+        }
+        Ok(damaged_files)
    }
 }

@@ -535,15 +555,9 @@ impl fmt::Debug for Index {

 #[cfg(test)]
 mod tests {
-    use crate::schema::Field;
-    use crate::schema::{Schema, INDEXED, TEXT};
-    use crate::IndexReader;
-    use crate::ReloadPolicy;
-    use crate::{
-        directory::{RamDirectory, WatchCallback},
-        IndexSettings,
-    };
-    use crate::{Directory, Index};
+    use crate::directory::{RamDirectory, WatchCallback};
+    use crate::schema::{Field, Schema, INDEXED, TEXT};
+    use crate::{Directory, Index, IndexReader, IndexSettings, ReloadPolicy};

    #[test]
    fn test_indexer_for_field() {
@@ -561,15 +575,15 @@ mod tests {

    #[test]
    fn test_index_exists() {
-        let directory = RamDirectory::create();
-        assert!(!Index::exists(&directory).unwrap());
+        let directory: Box<dyn Directory> = Box::new(RamDirectory::create());
+        assert!(!Index::exists(directory.as_ref()).unwrap());
        assert!(Index::create(
            directory.clone(),
            throw_away_schema(),
            IndexSettings::default()
        )
        .is_ok());
-        assert!(Index::exists(&directory).unwrap());
+        assert!(Index::exists(directory.as_ref()).unwrap());
    }

    #[test]
@@ -582,27 +596,27 @@ mod tests {

    #[test]
    fn open_or_create_should_open() {
-        let directory = RamDirectory::create();
+        let directory: Box<dyn Directory> = Box::new(RamDirectory::create());
        assert!(Index::create(
            directory.clone(),
            throw_away_schema(),
            IndexSettings::default()
        )
        .is_ok());
-        assert!(Index::exists(&directory).unwrap());
+        assert!(Index::exists(directory.as_ref()).unwrap());
        assert!(Index::open_or_create(directory, throw_away_schema()).is_ok());
    }

    #[test]
    fn create_should_wipeoff_existing() {
-        let directory = RamDirectory::create();
+        let directory: Box<dyn Directory> = Box::new(RamDirectory::create());
        assert!(Index::create(
            directory.clone(),
            throw_away_schema(),
            IndexSettings::default()
        )
        .is_ok());
-        assert!(Index::exists(&directory).unwrap());
+        assert!(Index::exists(directory.as_ref()).unwrap());
        assert!(Index::create(
            directory,
            Schema::builder().build(),
@@ -636,7 +650,7 @@ mod tests {
    }

    #[test]
-    fn test_index_on_commit_reload_policy() {
+    fn test_index_on_commit_reload_policy() -> crate::Result<()> {
        let schema = throw_away_schema();
        let field = schema.get_field("num_likes").unwrap();
        let index = Index::create_in_ram(schema);
@@ -646,19 +660,21 @@ mod tests {
            .try_into()
            .unwrap();
        assert_eq!(reader.searcher().num_docs(), 0);
-        test_index_on_commit_reload_policy_aux(field, &index, &reader);
+        test_index_on_commit_reload_policy_aux(field, &index, &reader)
    }

    #[cfg(feature = "mmap")]
    mod mmap_specific {

-        use super::*;
-        use crate::Directory;
        use std::path::PathBuf;
+
        use tempfile::TempDir;

+        use super::*;
+        use crate::Directory;
+
        #[test]
-        fn test_index_on_commit_reload_policy_mmap() {
+        fn test_index_on_commit_reload_policy_mmap() -> crate::Result<()> {
            let schema = throw_away_schema();
            let field = schema.get_field("num_likes").unwrap();
            let tempdir = TempDir::new().unwrap();
@@ -670,7 +686,7 @@ mod tests {
                .try_into()
                .unwrap();
            assert_eq!(reader.searcher().num_docs(), 0);
-            test_index_on_commit_reload_policy_aux(field, &index, &reader);
+            test_index_on_commit_reload_policy_aux(field, &index, &reader)
        }

        #[test]
@@ -685,7 +701,7 @@ mod tests {
                .reload_policy(ReloadPolicy::Manual)
                .try_into()?;
            assert_eq!(reader.searcher().num_docs(), 0);
-            writer.add_document(doc!(field=>1u64));
+            writer.add_document(doc!(field=>1u64))?;
            let (sender, receiver) = crossbeam::channel::unbounded();
            let _handle = index.directory_mut().watch(WatchCallback::new(move || {
                let _ = sender.send(());
@@ -699,7 +715,7 @@ mod tests {
        }

        #[test]
-        fn test_index_on_commit_reload_policy_different_directories() {
+        fn test_index_on_commit_reload_policy_different_directories() -> crate::Result<()> {
            let schema = throw_away_schema();
            let field = schema.get_field("num_likes").unwrap();
            let tempdir = TempDir::new().unwrap();
@@ -712,10 +728,14 @@ mod tests {
                .try_into()
                .unwrap();
            assert_eq!(reader.searcher().num_docs(), 0);
-            test_index_on_commit_reload_policy_aux(field, &write_index, &reader);
+            test_index_on_commit_reload_policy_aux(field, &write_index, &reader)
        }
    }
-    fn test_index_on_commit_reload_policy_aux(field: Field, index: &Index, reader: &IndexReader) {
+    fn test_index_on_commit_reload_policy_aux(
+        field: Field,
+        index: &Index,
+        reader: &IndexReader,
+    ) -> crate::Result<()> {
        let mut reader_index = reader.index();
        let (sender, receiver) = crossbeam::channel::unbounded();
        let _watch_handle = reader_index
@@ -723,9 +743,9 @@ mod tests {
            .watch(WatchCallback::new(move || {
                let _ = sender.send(());
            }));
-        let mut writer = index.writer_for_tests().unwrap();
+        let mut writer = index.writer_for_tests()?;
        assert_eq!(reader.searcher().num_docs(), 0);
-        writer.add_document(doc!(field=>1u64));
+        writer.add_document(doc!(field=>1u64))?;
        writer.commit().unwrap();
        // We need a loop here because it is possible for notify to send more than
        // one modify event. It was observed on CI on MacOS.
@@ -735,7 +755,7 @@ mod tests {
                break;
            }
        }
-        writer.add_document(doc!(field=>2u64));
+        writer.add_document(doc!(field=>2u64))?;
        writer.commit().unwrap();
        // ... Same as above
        loop {
@@ -744,37 +764,37 @@ mod tests {
                break;
            }
        }
+        Ok(())
    }

    // This test will not pass on windows, because windows
    // prevent deleting files that are MMapped.
    #[cfg(not(target_os = "windows"))]
    #[test]
-    fn garbage_collect_works_as_intended() {
+    fn garbage_collect_works_as_intended() -> crate::Result<()> {
        let directory = RamDirectory::create();
        let schema = throw_away_schema();
        let field = schema.get_field("num_likes").unwrap();
-        let index = Index::create(directory.clone(), schema, IndexSettings::default()).unwrap();
+        let index = Index::create(directory.clone(), schema, IndexSettings::default())?;

        let mut writer = index.writer_with_num_threads(8, 24_000_000).unwrap();
        for i in 0u64..8_000u64 {
-            writer.add_document(doc!(field => i));
+            writer.add_document(doc!(field => i))?;
        }
        let (sender, receiver) = crossbeam::channel::unbounded();
        let _handle = directory.watch(WatchCallback::new(move || {
            let _ = sender.send(());
        }));
-        writer.commit().unwrap();
+        writer.commit()?;
        let mem_right_after_commit = directory.total_mem_usage();
        assert!(receiver.recv().is_ok());
        let reader = index
            .reader_builder()
            .reload_policy(ReloadPolicy::Manual)
-            .try_into()
-            .unwrap();
+            .try_into()?;

        assert_eq!(reader.searcher().num_docs(), 8_000);
-        writer.wait_merging_threads().unwrap();
+        writer.wait_merging_threads()?;
        let mem_right_after_merge_finished = directory.total_mem_usage();

        reader.reload().unwrap();
@@ -786,5 +806,6 @@ mod tests {
            mem_right_after_merge_finished,
            mem_right_after_commit
        );
+        Ok(())
    }
 }
--- a/src/core/index_meta.rs
+++ b/src/core/index_meta.rs
@@ -1,12 +1,16 @@
-use super::SegmentComponent;
-use crate::schema::Schema;
-use crate::Opstamp;
-use crate::{core::SegmentId, store::Compressor};
-use census::{Inventory, TrackedObject};
-use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
+use std::fmt;
 use std::path::PathBuf;
-use std::{collections::HashSet, sync::atomic::AtomicBool};
-use std::{fmt, sync::Arc};
+use std::sync::atomic::AtomicBool;
+use std::sync::Arc;
+
+use serde::{Deserialize, Serialize};
+
+use super::SegmentComponent;
+use crate::core::SegmentId;
+use crate::schema::Schema;
+use crate::store::Compressor;
+use crate::{Inventory, Opstamp, TrackedObject};

 #[derive(Clone, Debug, Serialize, Deserialize)]
 struct DeleteMeta {
@@ -101,6 +105,7 @@ impl SegmentMeta {

    /// Returns the list of files that
    /// are required for the segment meta.
+    /// Note: Some of the returned files may not exist depending on the state of the segment.
    ///
    /// This is useful as the way tantivy removes files
    /// is by removing all files that have been created by tantivy
@@ -187,7 +192,12 @@ impl SegmentMeta {
    }

    #[doc(hidden)]
+    #[must_use]
    pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: Opstamp) -> SegmentMeta {
+        assert!(
+            num_deleted_docs <= self.max_doc(),
+            "There cannot be more deleted docs than there are docs."
+        );
        let delete_meta = DeleteMeta {
            num_deleted_docs,
            opstamp,
@@ -277,7 +287,6 @@ impl Order {
 /// * the searchable segments,
 /// * the index `docstamp`
 /// * the schema
-///
 #[derive(Clone, Serialize)]
 pub struct IndexMeta {
    /// `IndexSettings` to configure index options.
@@ -365,10 +374,8 @@ impl fmt::Debug for IndexMeta {
 mod tests {

    use super::IndexMeta;
-    use crate::{
-        schema::{Schema, TEXT},
-        IndexSettings, IndexSortByField, Order,
-    };
+    use crate::schema::{Schema, TEXT};
+    use crate::{IndexSettings, IndexSortByField, Order};

    #[test]
    fn test_serialize_metas() {
@@ -393,7 +400,7 @@ mod tests {
        let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
        assert_eq!(
            json,
-            r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"lz4"},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","tokenizer":"default"},"stored":false}}],"opstamp":0}"#
+            r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"lz4"},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false}}],"opstamp":0}"#
        );
    }
 }
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -1,12 +1,11 @@
 use std::io;

-use crate::common::BinarySerializable;
+use common::BinarySerializable;
+
 use crate::directory::FileSlice;
 use crate::positions::PositionReader;
-use crate::postings::TermInfo;
-use crate::postings::{BlockSegmentPostings, SegmentPostings};
-use crate::schema::IndexRecordOption;
-use crate::schema::Term;
+use crate::postings::{BlockSegmentPostings, SegmentPostings, TermInfo};
+use crate::schema::{IndexRecordOption, Term};
 use crate::termdict::TermDictionary;

 /// The inverted index reader is in charge of accessing
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -8,21 +8,22 @@ mod segment_component;
 mod segment_id;
 mod segment_reader;

+use std::path::Path;
+
+use once_cell::sync::Lazy;
+
 pub use self::executor::Executor;
 pub use self::index::{Index, IndexBuilder};
 pub use self::index_meta::{
    IndexMeta, IndexSettings, IndexSortByField, Order, SegmentMeta, SegmentMetaInventory,
 };
 pub use self::inverted_index_reader::InvertedIndexReader;
-pub use self::searcher::Searcher;
+pub use self::searcher::{Searcher, SearcherGeneration};
 pub use self::segment::Segment;
 pub use self::segment_component::SegmentComponent;
 pub use self::segment_id::SegmentId;
 pub use self::segment_reader::SegmentReader;

-use once_cell::sync::Lazy;
-use std::path::Path;
-
 /// The meta file contains all the information about the list of segments and the schema
 /// of the index.
 pub static META_FILEPATH: Lazy<&'static Path> = Lazy::new(|| Path::new("meta.json"));
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -1,28 +1,73 @@
-use crate::collector::Collector;
-use crate::core::Executor;
+use std::collections::BTreeMap;
+use std::{fmt, io};

-use crate::core::SegmentReader;
+use crate::collector::Collector;
+use crate::core::{Executor, SegmentReader};
 use crate::query::Query;
-use crate::schema::Document;
-use crate::schema::Schema;
-use crate::schema::Term;
+use crate::schema::{Document, Schema, Term};
 use crate::space_usage::SearcherSpaceUsage;
 use crate::store::StoreReader;
-use crate::DocAddress;
-use crate::Index;
+use crate::{DocAddress, Index, Opstamp, SegmentId, TrackedObject};

-use std::{fmt, io};
+/// Identifies the searcher generation accessed by a [Searcher].
+///
+/// While this might seem redundant, a [SearcherGeneration] contains
+/// both a `generation_id` AND a list of `(SegmentId, DeleteOpstamp)`.
+///
+/// This is on purpose. This object is used by the `Warmer` API.
+/// Having both information makes it possible to identify which
+/// artifact should be refreshed or garbage collected.
+///
+/// Depending on the use case, `Warmer`'s implementers can decide to
+/// produce artifacts per:
+/// - `generation_id` (e.g. some searcher level aggregates)
+/// - `(segment_id, delete_opstamp)` (e.g. segment level aggregates)
+/// - `segment_id` (e.g. for immutable document level information)
+/// - `(generation_id, segment_id)` (e.g. for consistent dynamic column)
+/// - ...
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct SearcherGeneration {
+    segments: BTreeMap<SegmentId, Option<Opstamp>>,
+    generation_id: u64,
+}
+
+impl SearcherGeneration {
+    pub(crate) fn from_segment_readers(
+        segment_readers: &[SegmentReader],
+        generation_id: u64,
+    ) -> Self {
+        let mut segment_id_to_del_opstamp = BTreeMap::new();
+        for segment_reader in segment_readers {
+            segment_id_to_del_opstamp
+                .insert(segment_reader.segment_id(), segment_reader.delete_opstamp());
+        }
+        Self {
+            segments: segment_id_to_del_opstamp,
+            generation_id,
+        }
+    }
+
+    /// Returns the searcher generation id.
+    pub fn generation_id(&self) -> u64 {
+        self.generation_id
+    }
+
+    /// Return a `(SegmentId -> DeleteOpstamp)` mapping.
+    pub fn segments(&self) -> &BTreeMap<SegmentId, Option<Opstamp>> {
+        &self.segments
+    }
+}

 /// Holds a list of `SegmentReader`s ready for search.
 ///
 /// It guarantees that the `Segment` will not be removed before
 /// the destruction of the `Searcher`.
-///
 pub struct Searcher {
    schema: Schema,
    index: Index,
    segment_readers: Vec<SegmentReader>,
    store_readers: Vec<StoreReader>,
+    generation: TrackedObject<SearcherGeneration>,
 }

 impl Searcher {
@@ -31,6 +76,7 @@ impl Searcher {
        schema: Schema,
        index: Index,
        segment_readers: Vec<SegmentReader>,
+        generation: TrackedObject<SearcherGeneration>,
    ) -> io::Result<Searcher> {
        let store_readers: Vec<StoreReader> = segment_readers
            .iter()
@@ -41,6 +87,7 @@ impl Searcher {
            index,
            segment_readers,
            store_readers,
+            generation,
        })
    }

@@ -49,6 +96,11 @@ impl Searcher {
        &self.index
    }

+    /// [SearcherGeneration] which identifies the version of the snapshot held by this `Searcher`.
+    pub fn generation(&self) -> &SearcherGeneration {
+        self.generation.as_ref()
+    }
+
    /// Fetches a document from tantivy's store given a `DocAddress`.
    ///
    /// The searcher uses the segment ordinal to route the
@@ -88,7 +140,7 @@ impl Searcher {
        &self.segment_readers
    }

-    /// Returns the segment_reader associated with the given segment_ordinal
+    /// Returns the segment_reader associated with the given segment_ord
    pub fn segment_reader(&self, segment_ord: u32) -> &SegmentReader {
        &self.segment_readers[segment_ord as usize]
    }
--- a/src/core/segment.rs
+++ b/src/core/segment.rs
@@ -1,15 +1,13 @@
-use super::SegmentComponent;
-use crate::core::Index;
-use crate::core::SegmentId;
-use crate::core::SegmentMeta;
-use crate::directory::error::{OpenReadError, OpenWriteError};
-use crate::directory::Directory;
-use crate::directory::{FileSlice, WritePtr};
-use crate::schema::Schema;
-use crate::Opstamp;
 use std::fmt;
 use std::path::PathBuf;

+use super::SegmentComponent;
+use crate::core::{Index, SegmentId, SegmentMeta};
+use crate::directory::error::{OpenReadError, OpenWriteError};
+use crate::directory::{Directory, FileSlice, WritePtr};
+use crate::schema::Schema;
+use crate::Opstamp;
+
 /// A segment is a piece of the index.
 #[derive(Clone)]
 pub struct Segment {
@@ -56,6 +54,7 @@ impl Segment {
    }

    #[doc(hidden)]
+    #[must_use]
    pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: Opstamp) -> Segment {
        Segment {
            index: self.index,
--- a/src/core/segment_id.rs
+++ b/src/core/segment_id.rs
@@ -1,14 +1,14 @@
 use std::cmp::{Ord, Ordering};
+use std::error::Error;
 use std::fmt;
-use uuid::Uuid;
+use std::str::FromStr;
+#[cfg(test)]
+use std::sync::atomic;

 #[cfg(test)]
 use once_cell::sync::Lazy;
 use serde::{Deserialize, Serialize};
-use std::error::Error;
-use std::str::FromStr;
-#[cfg(test)]
-use std::sync::atomic;
+use uuid::Uuid;

 /// Uuid identifying a segment.
 ///
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -1,25 +1,19 @@
-use crate::core::InvertedIndexReader;
-use crate::core::Segment;
-use crate::core::SegmentComponent;
-use crate::core::SegmentId;
-use crate::directory::FileSlice;
-use crate::fastfield::DeleteBitSet;
-use crate::fastfield::FacetReader;
-use crate::fastfield::FastFieldReaders;
+use std::collections::HashMap;
+use std::sync::{Arc, RwLock};
+use std::{fmt, io};
+
+use fail::fail_point;
+
+use crate::core::{InvertedIndexReader, Segment, SegmentComponent, SegmentId};
+use crate::directory::{CompositeFile, FileSlice};
+use crate::error::DataCorruption;
+use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders};
 use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
-use crate::schema::FieldType;
-use crate::schema::Schema;
-use crate::schema::{Field, IndexRecordOption};
+use crate::schema::{Field, FieldType, IndexRecordOption, Schema};
 use crate::space_usage::SegmentSpaceUsage;
 use crate::store::StoreReader;
 use crate::termdict::TermDictionary;
-use crate::DocId;
-use crate::{common::CompositeFile, error::DataCorruption};
-use fail::fail_point;
-use std::fmt;
-use std::sync::Arc;
-use std::sync::RwLock;
-use std::{collections::HashMap, io};
+use crate::{DocId, Opstamp};

 /// Entry point to access all of the datastructures of the `Segment`
 ///
@@ -36,6 +30,8 @@ pub struct SegmentReader {
    inv_idx_reader_cache: Arc<RwLock<HashMap<Field, Arc<InvertedIndexReader>>>>,

    segment_id: SegmentId,
+    delete_opstamp: Option<Opstamp>,
+
    max_doc: DocId,
    num_docs: DocId,

@@ -46,7 +42,7 @@ pub struct SegmentReader {
    fieldnorm_readers: FieldNormReaders,

    store_file: FileSlice,
-    delete_bitset_opt: Option<DeleteBitSet>,
+    alive_bitset_opt: Option<AliveBitSet>,
    schema: Schema,
 }

@@ -71,14 +67,12 @@ impl SegmentReader {
    /// Return the number of documents that have been
    /// deleted in the segment.
    pub fn num_deleted_docs(&self) -> DocId {
-        self.delete_bitset()
-            .map(|delete_set| delete_set.num_deleted() as DocId)
-            .unwrap_or(0u32)
+        self.max_doc - self.num_docs
    }

    /// Returns true iff some of the documents of the segment have been deleted.
    pub fn has_deletes(&self) -> bool {
-        self.delete_bitset().is_some()
+        self.num_deleted_docs() > 0
    }

    /// Accessor to a segment's fast field reader given a field.
@@ -100,7 +94,7 @@ impl SegmentReader {
        let field_entry = self.schema.get_field_entry(field);

        match field_entry.field_type() {
-            FieldType::HierarchicalFacet(_) => {
+            FieldType::Facet(_) => {
                let term_ords_reader = self.fast_fields().u64s(field)?;
                let termdict = self
                    .termdict_composite
@@ -127,13 +121,18 @@ impl SegmentReader {
        self.fieldnorm_readers.get_field(field)?.ok_or_else(|| {
            let field_name = self.schema.get_field_name(field);
            let err_msg = format!(
-                "Field norm not found for field {:?}. Was it marked as indexed during indexing?",
+                "Field norm not found for field {:?}. Was the field set to record norm during \
+                 indexing?",
                field_name
            );
            crate::TantivyError::SchemaError(err_msg)
        })
    }

+    pub(crate) fn fieldnorms_readers(&self) -> &FieldNormReaders {
+        &self.fieldnorm_readers
+    }
+
    /// Accessor to the segment's `StoreReader`.
    pub fn get_store_reader(&self) -> io::Result<StoreReader> {
        StoreReader::open(self.store_file.clone())
@@ -141,6 +140,14 @@ impl SegmentReader {

    /// Open a new segment for reading.
    pub fn open(segment: &Segment) -> crate::Result<SegmentReader> {
+        Self::open_with_custom_alive_set(segment, None)
+    }
+
+    /// Open a new segment for reading.
+    pub fn open_with_custom_alive_set(
+        segment: &Segment,
+        custom_bitset: Option<AliveBitSet>,
+    ) -> crate::Result<SegmentReader> {
        let termdict_file = segment.open_read(SegmentComponent::Terms)?;
        let termdict_composite = CompositeFile::open(&termdict_file)?;

@@ -165,29 +172,37 @@ impl SegmentReader {
        let fast_fields_composite = CompositeFile::open(&fast_fields_data)?;
        let fast_field_readers =
            Arc::new(FastFieldReaders::new(schema.clone(), fast_fields_composite));
-
        let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
        let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;

-        let delete_bitset_opt = if segment.meta().has_deletes() {
-            let delete_data = segment.open_read(SegmentComponent::Delete)?;
-            let delete_bitset = DeleteBitSet::open(delete_data)?;
-            Some(delete_bitset)
+        let original_bitset = if segment.meta().has_deletes() {
+            let delete_file_slice = segment.open_read(SegmentComponent::Delete)?;
+            let delete_data = delete_file_slice.read_bytes()?;
+            Some(AliveBitSet::open(delete_data))
        } else {
            None
        };

+        let alive_bitset_opt = intersect_alive_bitset(original_bitset, custom_bitset);
+
+        let max_doc = segment.meta().max_doc();
+        let num_docs = alive_bitset_opt
+            .as_ref()
+            .map(|alive_bitset| alive_bitset.num_alive_docs() as u32)
+            .unwrap_or(max_doc);
+
        Ok(SegmentReader {
            inv_idx_reader_cache: Default::default(),
-            max_doc: segment.meta().max_doc(),
-            num_docs: segment.meta().num_docs(),
+            num_docs,
+            max_doc,
            termdict_composite,
            postings_composite,
            fast_fields_readers: fast_field_readers,
            fieldnorm_readers,
            segment_id: segment.id(),
+            delete_opstamp: segment.meta().delete_opstamp(),
            store_file,
-            delete_bitset_opt,
+            alive_bitset_opt,
            positions_composite,
            schema,
        })
@@ -236,19 +251,24 @@ impl SegmentReader {
        let record_option = record_option_opt.unwrap();
        let postings_file = postings_file_opt.unwrap();

-        let termdict_file: FileSlice = self.termdict_composite.open_read(field)
-            .ok_or_else(||
-               DataCorruption::comment_only(format!("Failed to open field {:?}'s term dictionary in the composite file. Has the schema been modified?", field_entry.name()))
-            )?;
-
-        let positions_file = self
-            .positions_composite
-            .open_read(field)
-            .ok_or_else(|| {
-                let error_msg = format!("Failed to open field {:?}'s positions in the composite file. Has the schema been modified?", field_entry.name());
-               DataCorruption::comment_only(error_msg)
+        let termdict_file: FileSlice =
+            self.termdict_composite.open_read(field).ok_or_else(|| {
+                DataCorruption::comment_only(format!(
+                    "Failed to open field {:?}'s term dictionary in the composite file. Has the \
+                     schema been modified?",
+                    field_entry.name()
+                ))
            })?;

+        let positions_file = self.positions_composite.open_read(field).ok_or_else(|| {
+            let error_msg = format!(
+                "Failed to open field {:?}'s positions in the composite file. Has the schema been \
+                 modified?",
+                field_entry.name()
+            );
+            DataCorruption::comment_only(error_msg)
+        })?;
+
        let inv_idx_reader = Arc::new(InvertedIndexReader::new(
            TermDictionary::open(termdict_file)?,
            postings_file,
@@ -271,23 +291,32 @@ impl SegmentReader {
        self.segment_id
    }

+    /// Returns the delete opstamp
+    pub fn delete_opstamp(&self) -> Option<Opstamp> {
+        self.delete_opstamp
+    }
+
    /// Returns the bitset representing
    /// the documents that have been deleted.
-    pub fn delete_bitset(&self) -> Option<&DeleteBitSet> {
-        self.delete_bitset_opt.as_ref()
+    pub fn alive_bitset(&self) -> Option<&AliveBitSet> {
+        self.alive_bitset_opt.as_ref()
    }

    /// Returns true iff the `doc` is marked
    /// as deleted.
    pub fn is_deleted(&self, doc: DocId) -> bool {
-        self.delete_bitset()
+        self.alive_bitset()
            .map(|delete_set| delete_set.is_deleted(doc))
            .unwrap_or(false)
    }

    /// Returns an iterator that will iterate over the alive document ids
-    pub fn doc_ids_alive(&self) -> impl Iterator<Item = DocId> + '_ {
-        (0u32..self.max_doc).filter(move |doc| !self.is_deleted(*doc))
+    pub fn doc_ids_alive(&self) -> Box<dyn Iterator<Item = DocId> + '_> {
+        if let Some(alive_bitset) = &self.alive_bitset_opt {
+            Box::new(alive_bitset.iter_alive())
+        } else {
+            Box::new(0u32..self.max_doc)
+        }
    }

    /// Summarize total space usage of this segment.
@@ -300,14 +329,29 @@ impl SegmentReader {
            self.fast_fields_readers.space_usage(),
            self.fieldnorm_readers.space_usage(),
            self.get_store_reader()?.space_usage(),
-            self.delete_bitset_opt
+            self.alive_bitset_opt
                .as_ref()
-                .map(DeleteBitSet::space_usage)
+                .map(AliveBitSet::space_usage)
                .unwrap_or(0),
        ))
    }
 }

+fn intersect_alive_bitset(
+    left_opt: Option<AliveBitSet>,
+    right_opt: Option<AliveBitSet>,
+) -> Option<AliveBitSet> {
+    match (left_opt, right_opt) {
+        (Some(left), Some(right)) => {
+            assert_eq!(left.bitset().max_value(), right.bitset().max_value());
+            Some(intersect_alive_bitsets(left, right))
+        }
+        (Some(left), None) => Some(left),
+        (None, Some(right)) => Some(right),
+        (None, None) => None,
+    }
+}
+
 impl fmt::Debug for SegmentReader {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "SegmentReader({:?})", self.segment_id)
@@ -330,10 +374,10 @@ mod test {

        {
            let mut index_writer = index.writer_for_tests()?;
-            index_writer.add_document(doc!(name => "tantivy"));
-            index_writer.add_document(doc!(name => "horse"));
-            index_writer.add_document(doc!(name => "jockey"));
-            index_writer.add_document(doc!(name => "cap"));
+            index_writer.add_document(doc!(name => "tantivy"))?;
+            index_writer.add_document(doc!(name => "horse"))?;
+            index_writer.add_document(doc!(name => "jockey"))?;
+            index_writer.add_document(doc!(name => "cap"))?;
            // we should now have one segment with two docs
            index_writer.delete_term(Term::from_field_text(name, "horse"));
            index_writer.delete_term(Term::from_field_text(name, "cap"));
@@ -356,10 +400,10 @@ mod test {

        {
            let mut index_writer = index.writer_for_tests()?;
-            index_writer.add_document(doc!(name => "tantivy"));
-            index_writer.add_document(doc!(name => "horse"));
-            index_writer.add_document(doc!(name => "jockey"));
-            index_writer.add_document(doc!(name => "cap"));
+            index_writer.add_document(doc!(name => "tantivy"))?;
+            index_writer.add_document(doc!(name => "horse"))?;
+            index_writer.add_document(doc!(name => "jockey"))?;
+            index_writer.add_document(doc!(name => "cap"))?;
            // we should now have one segment with two docs
            index_writer.commit()?;
        }
--- a/src/directory/composite_file.rs
+++ b/src/directory/composite_file.rs
@@ -1,17 +1,13 @@
-use crate::common::BinarySerializable;
-use crate::common::CountingWriter;
-use crate::common::VInt;
-use crate::directory::FileSlice;
-use crate::directory::{TerminatingWrite, WritePtr};
-use crate::schema::Field;
-use crate::space_usage::FieldUsage;
-use crate::space_usage::PerFieldSpaceUsage;
 use std::collections::HashMap;
 use std::io::{self, Read, Write};
 use std::iter::ExactSizeIterator;
 use std::ops::Range;

-use super::HasLen;
+use common::{BinarySerializable, CountingWriter, HasLen, VInt};
+
+use crate::directory::{FileSlice, TerminatingWrite, WritePtr};
+use crate::schema::Field;
+use crate::space_usage::{FieldUsage, PerFieldSpaceUsage};

 #[derive(Eq, PartialEq, Hash, Copy, Ord, PartialOrd, Clone, Debug)]
 pub struct FileAddr {
@@ -187,14 +183,15 @@ impl CompositeFile {
 #[cfg(test)]
 mod test {

-    use super::{CompositeFile, CompositeWrite};
-    use crate::common::BinarySerializable;
-    use crate::common::VInt;
-    use crate::directory::{Directory, RamDirectory};
-    use crate::schema::Field;
    use std::io::Write;
    use std::path::Path;

+    use common::{BinarySerializable, VInt};
+
+    use super::{CompositeFile, CompositeWrite};
+    use crate::directory::{Directory, RamDirectory};
+    use crate::schema::Field;
+
    #[test]
    fn test_composite_file() -> crate::Result<()> {
        let path = Path::new("test_path");
--- a/src/directory/directory.rs
+++ b/src/directory/directory.rs
@@ -1,18 +1,12 @@
-use crate::directory::directory_lock::Lock;
-use crate::directory::error::LockError;
-use crate::directory::error::{DeleteError, OpenReadError, OpenWriteError};
-use crate::directory::WatchHandle;
-use crate::directory::{FileHandle, WatchCallback};
-use crate::directory::{FileSlice, WritePtr};
-use std::fmt;
-use std::io;
 use std::io::Write;
-use std::marker::Send;
-use std::marker::Sync;
-use std::path::Path;
-use std::path::PathBuf;
-use std::thread;
+use std::marker::{Send, Sync};
+use std::path::{Path, PathBuf};
 use std::time::Duration;
+use std::{fmt, io, thread};
+
+use crate::directory::directory_lock::Lock;
+use crate::directory::error::{DeleteError, LockError, OpenReadError, OpenWriteError};
+use crate::directory::{FileHandle, FileSlice, WatchCallback, WatchHandle, WritePtr};

 /// Retry the logic of acquiring locks is pretty simple.
 /// We just retry `n` times after a given `duratio`, both
@@ -43,10 +37,8 @@ impl RetryPolicy {
 }

 /// The `DirectoryLock` is an object that represents a file lock.
-/// See  [`LockType`](struct.LockType.html)
 ///
-/// It is transparently associated to a lock file, that gets deleted
-/// on `Drop.` The lock is released automatically on `Drop`.
+/// It is associated to a lock file, that gets deleted on `Drop.`
 pub struct DirectoryLock(Box<dyn Send + Sync + 'static>);

 struct DirectoryLockGuard {
@@ -142,10 +134,16 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
    /// Opens a writer for the *virtual file* associated with
    /// a Path.
    ///
-    /// Right after this call, the file should be created
-    /// and any subsequent call to `open_read` for the
+    /// Right after this call, for the span of the execution of the program
+    /// the file should be created and any subsequent call to `open_read` for the
    /// same path should return a `FileSlice`.
    ///
+    /// However, depending on the directory implementation,
+    /// it might be required to call `sync_directory` to ensure
+    /// that the file is durably created.
+    /// (The semantics here are the same when dealing with
+    /// a posix filesystem.)
+    ///
    /// Write operations may be aggressively buffered.
    /// The client of this trait is responsible for calling flush
    /// to ensure that subsequent `read` operations
@@ -176,6 +174,12 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
    /// The file may or may not previously exist.
    fn atomic_write(&self, path: &Path, data: &[u8]) -> io::Result<()>;

+    /// Sync the directory.
+    ///
+    /// This call is required to ensure that newly created files are
+    /// effectively stored durably.
+    fn sync_directory(&self) -> io::Result<()>;
+
    /// Acquire a lock in the given directory.
    ///
    /// The method is blocking or not depending on the `Lock` object.
@@ -223,10 +227,21 @@ pub trait DirectoryClone {
 }

 impl<T> DirectoryClone for T
-where
-    T: 'static + Directory + Clone,
+where T: 'static + Directory + Clone
 {
    fn box_clone(&self) -> Box<dyn Directory> {
        Box::new(self.clone())
    }
 }
+
+impl Clone for Box<dyn Directory> {
+    fn clone(&self) -> Self {
+        self.box_clone()
+    }
+}
+
+impl<T: Directory + 'static> From<T> for Box<dyn Directory> {
+    fn from(t: T) -> Self {
+        Box::new(t)
+    }
+}
--- a/src/directory/directory_lock.rs
+++ b/src/directory/directory_lock.rs
@@ -1,17 +1,17 @@
-use once_cell::sync::Lazy;
 use std::path::PathBuf;

+use once_cell::sync::Lazy;
+
 /// A directory lock.
 ///
 /// A lock is associated to a specific path and some
 /// [`LockParams`](./enum.LockParams.html).
 /// Tantivy itself uses only two locks but client application
 /// can use the directory facility to define their own locks.
-/// - [INDEX_WRITER_LOCK](./struct.INDEX_WRITER_LOCK.html)
-/// - [META_LOCK](./struct.META_LOCK.html)
+/// - [INDEX_WRITER_LOCK]
+/// - [META_LOCK]
 ///
 /// Check out these locks documentation for more information.
-///
 #[derive(Debug)]
 pub struct Lock {
    /// The lock needs to be associated with its own file `path`.
--- a/src/directory/error.rs
+++ b/src/directory/error.rs
@@ -1,15 +1,17 @@
-use crate::Version;
-use std::fmt;
-use std::io;
 use std::path::PathBuf;
+use std::{fmt, io};
+
+use crate::Version;

 /// Error while trying to acquire a directory lock.
 #[derive(Debug, Error)]
 pub enum LockError {
    /// Failed to acquired a lock as it is already held by another
    /// client.
-    /// - In the context of a blocking lock, this means the lock was not released within some `timeout` period.
-    /// - In the context of a non-blocking lock, this means the lock was busy at the moment of the call.
+    /// - In the context of a blocking lock, this means the lock was not released within some
+    ///   `timeout` period.
+    /// - In the context of a non-blocking lock, this means the lock was busy at the moment of the
+    ///   call.
    #[error("Could not acquire lock as it is already held, possibly by a different process.")]
    LockBusy,
    /// Trying to acquire a lock failed with an `IoError`
@@ -39,6 +41,16 @@ pub enum OpenDirectoryError {
    },
 }

+impl OpenDirectoryError {
+    /// Wraps an io error.
+    pub fn wrap_io_error(io_error: io::Error, directory_path: PathBuf) -> Self {
+        Self::IoError {
+            io_error,
+            directory_path,
+        }
+    }
+}
+
 /// Error that may occur when starting to write in a file
 #[derive(Debug, Error)]
 pub enum OpenWriteError {
--- a/src/directory/file_slice.rs
+++ b/src/directory/file_slice.rs
@@ -1,11 +1,11 @@
+use std::ops::{Deref, Range};
+use std::sync::{Arc, Weak};
+use std::{fmt, io};
+
+use common::HasLen;
 use stable_deref_trait::StableDeref;

-use crate::common::HasLen;
 use crate::directory::OwnedBytes;
-use std::fmt;
-use std::ops::Range;
-use std::sync::{Arc, Weak};
-use std::{io, ops::Deref};

 pub type ArcBytes = Arc<dyn Deref<Target = [u8]> + Send + Sync + 'static>;
 pub type WeakArcBytes = Weak<dyn Deref<Target = [u8]> + Send + Sync + 'static>;
@@ -32,15 +32,8 @@ impl FileHandle for &'static [u8] {
    }
 }

-impl<T: Deref<Target = [u8]>> HasLen for T {
-    fn len(&self) -> usize {
-        self.deref().len()
-    }
-}
-
 impl<B> From<B> for FileSlice
-where
-    B: StableDeref + Deref<Target = [u8]> + 'static + Send + Sync,
+where B: StableDeref + Deref<Target = [u8]> + 'static + Send + Sync
 {
    fn from(bytes: B) -> FileSlice {
        FileSlice::new(Box::new(OwnedBytes::new(bytes)))
@@ -50,7 +43,6 @@ where
 /// Logical slice of read only file in tantivy.
 ///
 /// It can be cloned and sliced cheaply.
-///
 #[derive(Clone)]
 pub struct FileSlice {
    data: Arc<dyn FileHandle>,
@@ -72,6 +64,7 @@ impl FileSlice {

    /// Wraps a FileHandle.
    #[doc(hidden)]
+    #[must_use]
    pub fn new_with_num_bytes(file_handle: Box<dyn FileHandle>, num_bytes: usize) -> Self {
        FileSlice {
            data: Arc::from(file_handle),
@@ -84,6 +77,7 @@ impl FileSlice {
    /// # Panics
    ///
    /// Panics if `byte_range.end` exceeds the filesize.
+    #[must_use]
    pub fn slice(&self, byte_range: Range<usize>) -> FileSlice {
        assert!(byte_range.end <= self.len());
        FileSlice {
@@ -143,6 +137,7 @@ impl FileSlice {
    /// boundary.
    ///
    /// Equivalent to `.slice(from_offset, self.len())`
+    #[must_use]
    pub fn slice_from(&self, from_offset: usize) -> FileSlice {
        self.slice(from_offset..self.len())
    }
@@ -150,6 +145,7 @@ impl FileSlice {
    /// Returns a slice from the end.
    ///
    /// Equivalent to `.slice(self.len() - from_offset, self.len())`
+    #[must_use]
    pub fn slice_from_end(&self, from_offset: usize) -> FileSlice {
        self.slice(self.len() - from_offset..self.len())
    }
@@ -158,6 +154,7 @@ impl FileSlice {
    /// boundary.
    ///
    /// Equivalent to `.slice(0, to_offset)`
+    #[must_use]
    pub fn slice_to(&self, to_offset: usize) -> FileSlice {
        self.slice(0..to_offset)
    }
@@ -177,10 +174,12 @@ impl HasLen for FileSlice {

 #[cfg(test)]
 mod tests {
-    use super::{FileHandle, FileSlice};
-    use crate::common::HasLen;
    use std::io;

+    use common::HasLen;
+
+    use super::{FileHandle, FileSlice};
+
    #[test]
    fn test_file_slice() -> io::Result<()> {
        let file_slice = FileSlice::new(Box::new(b"abcdef".as_ref()));
--- a/src/directory/file_watcher.rs
+++ b/src/directory/file_watcher.rs
@@ -1,13 +1,13 @@
-use crate::directory::{WatchCallback, WatchCallbackList, WatchHandle};
-use crc32fast::Hasher;
-use std::fs;
-use std::io;
 use std::io::BufRead;
 use std::path::Path;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
-use std::thread;
 use std::time::Duration;
+use std::{fs, io, thread};
+
+use crc32fast::Hasher;
+
+use crate::directory::{WatchCallback, WatchCallbackList, WatchHandle};

 pub const POLLING_INTERVAL: Duration = Duration::from_millis(if cfg!(test) { 1 } else { 500 });

@@ -43,14 +43,16 @@ impl FileWatcher {
        thread::Builder::new()
            .name("thread-tantivy-meta-file-watcher".to_string())
            .spawn(move || {
-                let mut current_checksum = None;
+                let mut current_checksum_opt = None;

                while state.load(Ordering::SeqCst) == 1 {
                    if let Ok(checksum) = FileWatcher::compute_checksum(&path) {
-                        // `None.unwrap_or_else(|| !checksum) != checksum` evaluates to `true`
-                        if current_checksum.unwrap_or_else(|| !checksum) != checksum {
+                        let metafile_has_changed = current_checksum_opt
+                            .map(|current_checksum| current_checksum != checksum)
+                            .unwrap_or(true);
+                        if metafile_has_changed {
                            info!("Meta file {:?} was modified", path);
-                            current_checksum = Some(checksum);
+                            current_checksum_opt = Some(checksum);
                            futures::executor::block_on(callbacks.broadcast());
                        }
                    }
@@ -97,9 +99,8 @@ mod tests {

    use std::mem;

-    use crate::directory::mmap_directory::atomic_write;
-
    use super::*;
+    use crate::directory::mmap_directory::atomic_write;

    #[test]
    fn test_file_watcher_drop_watcher() -> crate::Result<()> {
--- a/src/directory/footer.rs
+++ b/src/directory/footer.rs
@@ -1,15 +1,14 @@
-use crate::directory::error::Incompatibility;
-use crate::directory::FileSlice;
-use crate::{
-    common::{BinarySerializable, CountingWriter, DeserializeFrom, FixedSize, HasLen},
-    directory::{AntiCallToken, TerminatingWrite},
-    Version, INDEX_FORMAT_VERSION,
-};
-use crc32fast::Hasher;
-use serde::{Deserialize, Serialize};
 use std::io;
 use std::io::Write;

+use common::{BinarySerializable, CountingWriter, DeserializeFrom, FixedSize, HasLen};
+use crc32fast::Hasher;
+use serde::{Deserialize, Serialize};
+
+use crate::directory::error::Incompatibility;
+use crate::directory::{AntiCallToken, FileSlice, TerminatingWrite};
+use crate::{Version, INDEX_FORMAT_VERSION};
+
 const FOOTER_MAX_LEN: u32 = 50_000;

 /// The magic byte of the footer to identify corruption
@@ -64,7 +63,9 @@ impl Footer {
        if footer_magic_byte != FOOTER_MAGIC_NUMBER {
            return Err(io::Error::new(
                io::ErrorKind::InvalidData,
-                    "Footer magic byte mismatch. File corrupted or index was created using old an tantivy version which is not supported anymore. Please use tantivy 0.15 or above to recreate the index.",
+                "Footer magic byte mismatch. File corrupted or index was created using old an \
+                 tantivy version which is not supported anymore. Please use tantivy 0.15 or above \
+                 to recreate the index.",
            ));
        }

@@ -73,7 +74,7 @@ impl Footer {
                io::ErrorKind::InvalidData,
                format!(
                    "Footer seems invalid as it suggests a footer len of {}. File is corrupted, \
-            or the index was created with a different & old version of tantivy.",
+                     or the index was created with a different & old version of tantivy.",
                    footer_len
                ),
            ));
@@ -154,14 +155,13 @@ impl<W: TerminatingWrite> TerminatingWrite for FooterProxy<W> {
 #[cfg(test)]
 mod tests {

-    use crate::directory::footer::Footer;
-    use crate::directory::OwnedBytes;
-    use crate::{
-        common::BinarySerializable,
-        directory::{footer::FOOTER_MAGIC_NUMBER, FileSlice},
-    };
    use std::io;

+    use common::BinarySerializable;
+
+    use crate::directory::footer::{Footer, FOOTER_MAGIC_NUMBER};
+    use crate::directory::{FileSlice, OwnedBytes};
+
    #[test]
    fn test_deserialize_footer() {
        let mut buf: Vec<u8> = vec![];
@@ -185,8 +185,9 @@ mod tests {
        let err = Footer::extract_footer(fileslice).unwrap_err();
        assert_eq!(
            err.to_string(),
-            "Footer magic byte mismatch. File corrupted or index was created using old an tantivy version which \
-            is not supported anymore. Please use tantivy 0.15 or above to recreate the index."
+            "Footer magic byte mismatch. File corrupted or index was created using old an tantivy \
+             version which is not supported anymore. Please use tantivy 0.15 or above to recreate \
+             the index."
        );
    }
    #[test]
@@ -221,8 +222,8 @@ mod tests {
        assert_eq!(err.kind(), io::ErrorKind::InvalidData);
        assert_eq!(
            err.to_string(),
-            "Footer seems invalid as it suggests a footer len of 50001. File is corrupted, \
-    or the index was created with a different & old version of tantivy."
+            "Footer seems invalid as it suggests a footer len of 50001. File is corrupted, or the \
+             index was created with a different & old version of tantivy."
        );
    }
 }
--- a/src/directory/managed_directory.rs
+++ b/src/directory/managed_directory.rs
@@ -1,23 +1,20 @@
-use crate::core::{MANAGED_FILEPATH, META_FILEPATH};
-use crate::directory::error::{DeleteError, LockError, OpenReadError, OpenWriteError};
-use crate::directory::footer::{Footer, FooterProxy};
-use crate::directory::GarbageCollectionResult;
-use crate::directory::Lock;
-use crate::directory::META_LOCK;
-use crate::directory::{DirectoryLock, FileHandle};
-use crate::directory::{FileSlice, WritePtr};
-use crate::directory::{WatchCallback, WatchHandle};
-use crate::error::DataCorruption;
-use crate::Directory;
-
-use crc32fast::Hasher;
 use std::collections::HashSet;
-use std::io;
 use std::io::Write;
 use std::path::{Path, PathBuf};
-use std::result;
-use std::sync::RwLockWriteGuard;
-use std::sync::{Arc, RwLock};
+use std::sync::{Arc, RwLock, RwLockWriteGuard};
+use std::{io, result};
+
+use crc32fast::Hasher;
+
+use crate::core::MANAGED_FILEPATH;
+use crate::directory::error::{DeleteError, LockError, OpenReadError, OpenWriteError};
+use crate::directory::footer::{Footer, FooterProxy};
+use crate::directory::{
+    DirectoryLock, FileHandle, FileSlice, GarbageCollectionResult, Lock, WatchCallback,
+    WatchHandle, WritePtr, META_LOCK,
+};
+use crate::error::DataCorruption;
+use crate::Directory;

 /// Returns true iff the file is "managed".
 /// Non-managed file are not subject to garbage collection.
@@ -64,7 +61,7 @@ fn save_managed_paths(

 impl ManagedDirectory {
    /// Wraps a directory as managed directory.
-    pub fn wrap<Dir: Directory>(directory: Dir) -> crate::Result<ManagedDirectory> {
+    pub fn wrap(directory: Box<dyn Directory>) -> crate::Result<ManagedDirectory> {
        match directory.atomic_read(&MANAGED_FILEPATH) {
            Ok(data) => {
                let managed_files_json = String::from_utf8_lossy(&data);
@@ -76,14 +73,14 @@ impl ManagedDirectory {
                        )
                    })?;
                Ok(ManagedDirectory {
-                    directory: Box::new(directory),
+                    directory,
                    meta_informations: Arc::new(RwLock::new(MetaInformation {
                        managed_paths: managed_files,
                    })),
                })
            }
            Err(OpenReadError::FileDoesNotExist(_)) => Ok(ManagedDirectory {
-                directory: Box::new(directory),
+                directory,
                meta_informations: Arc::default(),
            }),
            io_err @ Err(OpenReadError::IoError { .. }) => Err(io_err.err().unwrap().into()),
@@ -192,6 +189,7 @@ impl ManagedDirectory {
            for delete_file in &deleted_files {
                managed_paths_write.remove(delete_file);
            }
+            self.directory.sync_directory()?;
            save_managed_paths(self.directory.as_mut(), &meta_informations_wlock)?;
        }

@@ -222,9 +220,22 @@ impl ManagedDirectory {
            .write()
            .expect("Managed file lock poisoned");
        let has_changed = meta_wlock.managed_paths.insert(filepath.to_owned());
-        if has_changed {
-            save_managed_paths(self.directory.as_ref(), &meta_wlock)?;
+        if !has_changed {
+            return Ok(());
        }
+        save_managed_paths(self.directory.as_ref(), &meta_wlock)?;
+        // This is not the first file we add.
+        // Therefore, we are sure that `.managed.json` has been already
+        // properly created and we do not need to sync its parent directory.
+        //
+        // (It might seem like a nicer solution to create the managed_json on the
+        // creation of the ManagedDirectory instance but it would actually
+        // prevent the use of read-only directories..)
+        let managed_file_definitely_already_exists = meta_wlock.managed_paths.len() > 1;
+        if managed_file_definitely_already_exists {
+            return Ok(());
+        }
+        self.directory.sync_directory()?;
        Ok(())
    }

@@ -248,24 +259,15 @@ impl ManagedDirectory {
        Ok(footer.crc() == crc)
    }

-    /// List files for which checksum does not match content
-    pub fn list_damaged(&self) -> result::Result<HashSet<PathBuf>, OpenReadError> {
-        let mut managed_paths = self
+    /// List all managed files
+    pub fn list_managed_files(&self) -> HashSet<PathBuf> {
+        let managed_paths = self
            .meta_informations
            .read()
            .expect("Managed directory rlock poisoned in list damaged.")
            .managed_paths
            .clone();
-
-        managed_paths.remove(*META_FILEPATH);
-
-        let mut damaged_files = HashSet::new();
-        for path in managed_paths {
-            if !self.validate_checksum(&path)? {
-                damaged_files.insert(path);
-            }
-        }
-        Ok(damaged_files)
+        managed_paths
    }
 }

@@ -319,6 +321,11 @@ impl Directory for ManagedDirectory {
    fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
        self.directory.watch(watch_callback)
    }
+
+    fn sync_directory(&self) -> io::Result<()> {
+        self.directory.sync_directory()?;
+        Ok(())
+    }
 }

 impl Clone for ManagedDirectory {
@@ -334,13 +341,14 @@ impl Clone for ManagedDirectory {
 #[cfg(test)]
 mod tests_mmap_specific {

-    use crate::directory::{Directory, ManagedDirectory, MmapDirectory, TerminatingWrite};
    use std::collections::HashSet;
-    use std::fs::OpenOptions;
    use std::io::Write;
    use std::path::{Path, PathBuf};
+
    use tempfile::TempDir;

+    use crate::directory::{Directory, ManagedDirectory, MmapDirectory, TerminatingWrite};
+
    #[test]
    fn test_managed_directory() {
        let tempdir = TempDir::new().unwrap();
@@ -350,7 +358,7 @@ mod tests_mmap_specific {
        let test_path2: &'static Path = Path::new("some_path_for_test_2");
        {
            let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
-            let mut managed_directory = ManagedDirectory::wrap(mmap_directory).unwrap();
+            let mut managed_directory = ManagedDirectory::wrap(Box::new(mmap_directory)).unwrap();
            let write_file = managed_directory.open_write(test_path1).unwrap();
            write_file.terminate().unwrap();
            managed_directory
@@ -365,7 +373,7 @@ mod tests_mmap_specific {
        }
        {
            let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
-            let mut managed_directory = ManagedDirectory::wrap(mmap_directory).unwrap();
+            let mut managed_directory = ManagedDirectory::wrap(Box::new(mmap_directory)).unwrap();
            assert!(managed_directory.exists(test_path1).unwrap());
            assert!(!managed_directory.exists(test_path2).unwrap());
            let living_files: HashSet<PathBuf> = HashSet::new();
@@ -384,7 +392,7 @@ mod tests_mmap_specific {
        let living_files = HashSet::new();

        let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
-        let mut managed_directory = ManagedDirectory::wrap(mmap_directory).unwrap();
+        let mut managed_directory = ManagedDirectory::wrap(Box::new(mmap_directory)).unwrap();
        let mut write = managed_directory.open_write(test_path1).unwrap();
        write.write_all(&[0u8, 1u8]).unwrap();
        write.terminate().unwrap();
@@ -405,39 +413,4 @@ mod tests_mmap_specific {
        }
        assert!(!managed_directory.exists(test_path1).unwrap());
    }
-
-    #[test]
-    fn test_checksum() -> crate::Result<()> {
-        let test_path1: &'static Path = Path::new("some_path_for_test");
-        let test_path2: &'static Path = Path::new("other_test_path");
-
-        let tempdir = TempDir::new().unwrap();
-        let tempdir_path = PathBuf::from(tempdir.path());
-
-        let mmap_directory = MmapDirectory::open(&tempdir_path)?;
-        let managed_directory = ManagedDirectory::wrap(mmap_directory)?;
-        let mut write = managed_directory.open_write(test_path1)?;
-        write.write_all(&[0u8, 1u8])?;
-        write.terminate()?;
-
-        let mut write = managed_directory.open_write(test_path2)?;
-        write.write_all(&[3u8, 4u8, 5u8])?;
-        write.terminate()?;
-
-        let read_file = managed_directory.open_read(test_path2)?.read_bytes()?;
-        assert_eq!(read_file.as_slice(), &[3u8, 4u8, 5u8]);
-        assert!(managed_directory.list_damaged().unwrap().is_empty());
-
-        let mut corrupted_path = tempdir_path;
-        corrupted_path.push(test_path2);
-        let mut file = OpenOptions::new().write(true).open(&corrupted_path)?;
-        file.write_all(&[255u8])?;
-        file.flush()?;
-        drop(file);
-
-        let damaged = managed_directory.list_damaged()?;
-        assert_eq!(damaged.len(), 1);
-        assert!(damaged.contains(test_path2));
-        Ok(())
-    }
 }
--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -1,32 +1,28 @@
-use crate::core::META_FILEPATH;
-use crate::directory::error::LockError;
-use crate::directory::error::{DeleteError, OpenDirectoryError, OpenReadError, OpenWriteError};
-use crate::directory::file_watcher::FileWatcher;
-use crate::directory::Directory;
-use crate::directory::DirectoryLock;
-use crate::directory::Lock;
-use crate::directory::WatchCallback;
-use crate::directory::WatchHandle;
-use crate::directory::{AntiCallToken, FileHandle, OwnedBytes};
-use crate::directory::{ArcBytes, WeakArcBytes};
-use crate::directory::{TerminatingWrite, WritePtr};
+use std::collections::HashMap;
+use std::convert::From;
+use std::fs::{self, File, OpenOptions};
+use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
+use std::ops::Deref;
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, RwLock};
+use std::{fmt, result};
+
 use fs2::FileExt;
-use memmap::Mmap;
+use memmap2::Mmap;
 use serde::{Deserialize, Serialize};
 use stable_deref_trait::StableDeref;
-use std::convert::From;
-use std::fmt;
-use std::fs::OpenOptions;
-use std::fs::{self, File};
-use std::io::{self, Seek, SeekFrom};
-use std::io::{BufWriter, Read, Write};
-use std::path::{Path, PathBuf};
-use std::result;
-use std::sync::Arc;
-use std::sync::RwLock;
-use std::{collections::HashMap, ops::Deref};
 use tempfile::TempDir;

+use crate::core::META_FILEPATH;
+use crate::directory::error::{
+    DeleteError, LockError, OpenDirectoryError, OpenReadError, OpenWriteError,
+};
+use crate::directory::file_watcher::FileWatcher;
+use crate::directory::{
+    AntiCallToken, ArcBytes, Directory, DirectoryLock, FileHandle, Lock, OwnedBytes,
+    TerminatingWrite, WatchCallback, WatchHandle, WeakArcBytes, WritePtr,
+};
+
 /// Create a default io error given a string.
 pub(crate) fn make_io_err(msg: String) -> io::Error {
    io::Error::new(io::ErrorKind::Other, msg)
@@ -53,7 +49,7 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
        return Ok(None);
    }
    unsafe {
-        memmap::Mmap::map(&file)
+        memmap2::Mmap::map(&file)
            .map(Some)
            .map_err(|io_err| OpenReadError::wrap_io_error(io_err, full_path.to_path_buf()))
    }
@@ -74,20 +70,12 @@ pub struct CacheInfo {
    pub mmapped: Vec<PathBuf>,
 }

+#[derive(Default)]
 struct MmapCache {
    counters: CacheCounters,
    cache: HashMap<PathBuf, WeakArcBytes>,
 }

-impl Default for MmapCache {
-    fn default() -> MmapCache {
-        MmapCache {
-            counters: CacheCounters::default(),
-            cache: HashMap::new(),
-        }
-    }
-}
-
 impl MmapCache {
    fn get_info(&self) -> CacheInfo {
        let paths: Vec<PathBuf> = self.cache.keys().cloned().collect();
@@ -201,16 +189,19 @@ impl MmapDirectory {
    pub fn open<P: AsRef<Path>>(directory_path: P) -> Result<MmapDirectory, OpenDirectoryError> {
        let directory_path: &Path = directory_path.as_ref();
        if !directory_path.exists() {
-            Err(OpenDirectoryError::DoesNotExist(PathBuf::from(
+            return Err(OpenDirectoryError::DoesNotExist(PathBuf::from(
                directory_path,
-            )))
-        } else if !directory_path.is_dir() {
-            Err(OpenDirectoryError::NotADirectory(PathBuf::from(
-                directory_path,
-            )))
-        } else {
-            Ok(MmapDirectory::new(PathBuf::from(directory_path), None))
+            )));
        }
+        let canonical_path: PathBuf = directory_path.canonicalize().map_err(|io_err| {
+            OpenDirectoryError::wrap_io_error(io_err, PathBuf::from(directory_path))
+        })?;
+        if !canonical_path.is_dir() {
+            return Err(OpenDirectoryError::NotADirectory(PathBuf::from(
+                directory_path,
+            )));
+        }
+        Ok(MmapDirectory::new(canonical_path, None))
    }

    /// Joins a relative_path to the directory `root_path`
@@ -219,33 +210,6 @@ impl MmapDirectory {
        self.inner.root_path.join(relative_path)
    }

-    /// Sync the root directory.
-    /// In certain FS, this is required to persistently create
-    /// a file.
-    fn sync_directory(&self) -> Result<(), io::Error> {
-        let mut open_opts = OpenOptions::new();
-
-        // Linux needs read to be set, otherwise returns EINVAL
-        // write must not be set, or it fails with EISDIR
-        open_opts.read(true);
-
-        // On Windows, opening a directory requires FILE_FLAG_BACKUP_SEMANTICS
-        // and calling sync_all() only works if write access is requested.
-        #[cfg(windows)]
-        {
-            use std::os::windows::fs::OpenOptionsExt;
-            use winapi::um::winbase;
-
-            open_opts
-                .write(true)
-                .custom_flags(winbase::FILE_FLAG_BACKUP_SEMANTICS);
-        }
-
-        let fd = open_opts.open(&self.inner.root_path)?;
-        fd.sync_all()?;
-        Ok(())
-    }
-
    /// Returns some statistical information
    /// about the Mmap cache.
    ///
@@ -296,8 +260,7 @@ impl Write for SafeFileWriter {
    }

    fn flush(&mut self) -> io::Result<()> {
-        self.0.flush()?;
-        self.0.sync_all()
+        Ok(())
    }
 }

@@ -309,7 +272,9 @@ impl Seek for SafeFileWriter {

 impl TerminatingWrite for SafeFileWriter {
    fn terminate_ref(&mut self, _: AntiCallToken) -> io::Result<()> {
-        self.flush()
+        self.0.flush()?;
+        self.0.sync_data()?;
+        Ok(())
    }
 }

@@ -339,6 +304,7 @@ pub(crate) fn atomic_write(path: &Path, content: &[u8]) -> io::Result<()> {
    let mut tempfile = tempfile::Builder::new().tempfile_in(&parent_path)?;
    tempfile.write_all(content)?;
    tempfile.flush()?;
+    tempfile.as_file_mut().sync_data()?;
    tempfile.into_temp_path().persist(path)?;
    Ok(())
 }
@@ -350,8 +316,7 @@ impl Directory for MmapDirectory {

        let mut mmap_cache = self.inner.mmap_cache.write().map_err(|_| {
            let msg = format!(
-                "Failed to acquired write lock \
-                 on mmap cache while reading {:?}",
+                "Failed to acquired write lock on mmap cache while reading {:?}",
                path
            );
            let io_err = make_io_err(msg);
@@ -373,22 +338,17 @@ impl Directory for MmapDirectory {
    /// removed before the file is deleted.
    fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
        let full_path = self.resolve_path(path);
-        match fs::remove_file(&full_path) {
-            Ok(_) => self.sync_directory().map_err(|e| DeleteError::IoError {
-                io_error: e,
-                filepath: path.to_path_buf(),
-            }),
-            Err(e) => {
-                if e.kind() == io::ErrorKind::NotFound {
-                    Err(DeleteError::FileDoesNotExist(path.to_owned()))
-                } else {
-                    Err(DeleteError::IoError {
-                        io_error: e,
-                        filepath: path.to_path_buf(),
-                    })
+        fs::remove_file(&full_path).map_err(|e| {
+            if e.kind() == io::ErrorKind::NotFound {
+                DeleteError::FileDoesNotExist(path.to_owned())
+            } else {
+                DeleteError::IoError {
+                    io_error: e,
+                    filepath: path.to_path_buf(),
                }
            }
-        }
+        })?;
+        Ok(())
    }

    fn exists(&self, path: &Path) -> Result<bool, OpenReadError> {
@@ -417,10 +377,13 @@ impl Directory for MmapDirectory {
        file.flush()
            .map_err(|io_error| OpenWriteError::wrap_io_error(io_error, path.to_path_buf()))?;

-        // Apparetntly, on some filesystem syncing the parent
-        // directory is required.
-        self.sync_directory()
-            .map_err(|io_err| OpenWriteError::wrap_io_error(io_err, path.to_path_buf()))?;
+        // Note we actually do not sync the parent directory here.
+        //
+        // A newly created file, may, in some case, be created and even flushed to disk.
+        // and then lost...
+        //
+        // The file will only be durably written after we terminate AND
+        // sync_directory() is called.

        let writer = SafeFileWriter::new(file);
        Ok(BufWriter::new(Box::new(writer)))
@@ -450,7 +413,7 @@ impl Directory for MmapDirectory {
        debug!("Atomic Write {:?}", path);
        let full_path = self.resolve_path(path);
        atomic_write(&full_path, content)?;
-        self.sync_directory()
+        Ok(())
    }

    fn acquire_lock(&self, lock: &Lock) -> Result<DirectoryLock, LockError> {
@@ -476,6 +439,31 @@ impl Directory for MmapDirectory {
    fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
        Ok(self.inner.watch(watch_callback))
    }
+
+    fn sync_directory(&self) -> Result<(), io::Error> {
+        let mut open_opts = OpenOptions::new();
+
+        // Linux needs read to be set, otherwise returns EINVAL
+        // write must not be set, or it fails with EISDIR
+        open_opts.read(true);
+
+        // On Windows, opening a directory requires FILE_FLAG_BACKUP_SEMANTICS
+        // and calling sync_all() only works if write access is requested.
+        #[cfg(windows)]
+        {
+            use std::os::windows::fs::OpenOptionsExt;
+
+            use winapi::um::winbase;
+
+            open_opts
+                .write(true)
+                .custom_flags(winbase::FILE_FLAG_BACKUP_SEMANTICS);
+        }
+
+        let fd = open_opts.open(&self.inner.root_path)?;
+        fd.sync_data()?;
+        Ok(())
+    }
 }

 #[cfg(test)]
@@ -484,14 +472,12 @@ mod tests {
    // There are more tests in directory/mod.rs
    // The following tests are specific to the MmapDirectory

+    use common::HasLen;
+
    use super::*;
-    use crate::Index;
-    use crate::ReloadPolicy;
-    use crate::{common::HasLen, indexer::LogMergePolicy};
-    use crate::{
-        schema::{Schema, SchemaBuilder, TEXT},
-        IndexSettings,
-    };
+    use crate::indexer::LogMergePolicy;
+    use crate::schema::{Schema, SchemaBuilder, TEXT};
+    use crate::{Index, IndexSettings, ReloadPolicy};

    #[test]
    fn test_open_non_existent_path() {
@@ -528,7 +514,7 @@ mod tests {
        {
            for path in &paths {
                let mut w = mmap_directory.open_write(path).unwrap();
-                w.write(content).unwrap();
+                w.write_all(content).unwrap();
                w.flush().unwrap();
            }
        }
@@ -581,8 +567,8 @@ mod tests {
    }

    #[test]
-    fn test_mmap_released() {
-        let mmap_directory = MmapDirectory::create_from_tempdir().unwrap();
+    fn test_mmap_released() -> crate::Result<()> {
+        let mmap_directory = MmapDirectory::create_from_tempdir()?;
        let mut schema_builder: SchemaBuilder = Schema::builder();
        let text_field = schema_builder.add_text_field("text", TEXT);
        let schema = schema_builder.build();
@@ -591,31 +577,30 @@ mod tests {
            let index =
                Index::create(mmap_directory.clone(), schema, IndexSettings::default()).unwrap();

-            let mut index_writer = index.writer_for_tests().unwrap();
+            let mut index_writer = index.writer_for_tests()?;
            let mut log_merge_policy = LogMergePolicy::default();
            log_merge_policy.set_min_num_segments(3);
            index_writer.set_merge_policy(Box::new(log_merge_policy));
            for _num_commits in 0..10 {
                for _ in 0..10 {
-                    index_writer.add_document(doc!(text_field=>"abc"));
+                    index_writer.add_document(doc!(text_field=>"abc"))?;
                }
-                index_writer.commit().unwrap();
+                index_writer.commit()?;
            }

            let reader = index
                .reader_builder()
                .reload_policy(ReloadPolicy::Manual)
-                .try_into()
-                .unwrap();
+                .try_into()?;

            for _ in 0..4 {
-                index_writer.add_document(doc!(text_field=>"abc"));
-                index_writer.commit().unwrap();
-                reader.reload().unwrap();
+                index_writer.add_document(doc!(text_field=>"abc"))?;
+                index_writer.commit()?;
+                reader.reload()?;
            }
-            index_writer.wait_merging_threads().unwrap();
+            index_writer.wait_merging_threads()?;

-            reader.reload().unwrap();
+            reader.reload()?;
            let num_segments = reader.searcher().segment_readers().len();
            assert!(num_segments <= 4);
            let num_components_except_deletes_and_tempstore =
@@ -626,5 +611,6 @@ mod tests {
            );
        }
        assert!(mmap_directory.get_cache_info().mmapped.is_empty());
+        Ok(())
    }
 }
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -1,8 +1,4 @@
-/*!
-
-WORM directory abstraction.
-
-*/
+//! WORM (Write Once Read Many) directory abstraction.

 #[cfg(feature = "mmap")]
 mod mmap_directory;
@@ -20,18 +16,21 @@ mod watch_event_router;
 /// Errors specific to the directory module.
 pub mod error;

-pub use self::directory::DirectoryLock;
-pub use self::directory::{Directory, DirectoryClone};
+mod composite_file;
+
+use std::io::BufWriter;
+use std::path::PathBuf;
+
+pub use common::{AntiCallToken, TerminatingWrite};
+
+pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
+pub use self::directory::{Directory, DirectoryClone, DirectoryLock};
 pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
 pub(crate) use self::file_slice::{ArcBytes, WeakArcBytes};
 pub use self::file_slice::{FileHandle, FileSlice};
 pub use self::owned_bytes::OwnedBytes;
 pub use self::ram_directory::RamDirectory;
 pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle};
-pub use common::AntiCallToken;
-pub use common::TerminatingWrite;
-use std::io::BufWriter;
-use std::path::PathBuf;

 /// Outcome of the Garbage collection
 pub struct GarbageCollectionResult {
@@ -47,11 +46,10 @@ pub struct GarbageCollectionResult {
    pub failed_to_delete_files: Vec<PathBuf>,
 }

+pub use self::managed_directory::ManagedDirectory;
 #[cfg(feature = "mmap")]
 pub use self::mmap_directory::MmapDirectory;

-pub use self::managed_directory::ManagedDirectory;
-
 /// Write object for Directory.
 ///
 /// `WritePtr` are required to implement both Write
--- a/src/directory/owned_bytes.rs
+++ b/src/directory/owned_bytes.rs
@@ -1,9 +1,10 @@
-use crate::directory::FileHandle;
 use std::io;
 use std::ops::Range;

 pub use ownedbytes::OwnedBytes;

+use crate::directory::FileHandle;
+
 impl FileHandle for OwnedBytes {
    fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
        Ok(self.slice(range))
--- a/src/directory/ram_directory.rs
+++ b/src/directory/ram_directory.rs
@@ -1,29 +1,23 @@
-use crate::directory::error::{DeleteError, OpenReadError, OpenWriteError};
-use crate::directory::AntiCallToken;
-use crate::directory::WatchCallbackList;
-use crate::directory::{Directory, FileSlice, WatchCallback, WatchHandle};
-use crate::directory::{TerminatingWrite, WritePtr};
-use crate::{common::HasLen, core::META_FILEPATH};
-use fail::fail_point;
 use std::collections::HashMap;
-use std::fmt;
 use std::io::{self, BufWriter, Cursor, Seek, SeekFrom, Write};
 use std::path::{Path, PathBuf};
-use std::result;
 use std::sync::{Arc, RwLock};
+use std::{fmt, result};
+
+use common::HasLen;
+use fail::fail_point;

 use super::FileHandle;
+use crate::core::META_FILEPATH;
+use crate::directory::error::{DeleteError, OpenReadError, OpenWriteError};
+use crate::directory::{
+    AntiCallToken, Directory, FileSlice, TerminatingWrite, WatchCallback, WatchCallbackList,
+    WatchHandle, WritePtr,
+};

 /// Writer associated with the `RamDirectory`
 ///
 /// The Writer just writes a buffer.
-///
-/// # Panics
-///
-/// On drop, if the writer was left in a *dirty* state.
-/// That is, if flush was not called after the last call
-/// to write.
-///
 struct VecWriter {
    path: PathBuf,
    shared_directory: RamDirectory,
@@ -45,8 +39,10 @@ impl VecWriter {
 impl Drop for VecWriter {
    fn drop(&mut self) {
        if !self.is_flushed {
-            panic!(
-                "You forgot to flush {:?} before its writter got Drop. Do not rely on drop. This also occurs when the indexer crashed, so you may want to check the logs for the root cause.",
+            warn!(
+                "You forgot to flush {:?} before its writter got Drop. Do not rely on drop. This \
+                 also occurs when the indexer crashed, so you may want to check the logs for the \
+                 root cause.",
                self.path
            )
        }
@@ -129,7 +125,6 @@ impl fmt::Debug for RamDirectory {
 ///
 /// It is mainly meant for unit testing.
 /// Writes are only made visible upon flushing.
-///
 #[derive(Clone, Default)]
 pub struct RamDirectory {
    fs: Arc<RwLock<InnerDirectory>>,
@@ -220,14 +215,8 @@ impl Directory for RamDirectory {
    }

    fn atomic_write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
-        fail_point!("RamDirectory::atomic_write", |msg| Err(io::Error::new(
-            io::ErrorKind::Other,
-            msg.unwrap_or_else(|| "Undefined".to_string())
-        )));
        let path_buf = PathBuf::from(path);
-
        self.fs.write().unwrap().write(path_buf, data);
-
        if path == *META_FILEPATH {
            let _ = self.fs.write().unwrap().watch_router.broadcast();
        }
@@ -237,15 +226,20 @@ impl Directory for RamDirectory {
    fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
        Ok(self.fs.write().unwrap().watch(watch_callback))
    }
+
+    fn sync_directory(&self) -> io::Result<()> {
+        Ok(())
+    }
 }

 #[cfg(test)]
 mod tests {
-    use super::RamDirectory;
-    use crate::Directory;
    use std::io::Write;
    use std::path::Path;

+    use super::RamDirectory;
+    use crate::Directory;
+
    #[test]
    fn test_persist() {
        let msg_atomic: &'static [u8] = b"atomic is the way";
--- a/src/directory/tests.rs
+++ b/src/directory/tests.rs
@@ -1,6 +1,3 @@
-use super::*;
-use futures::channel::oneshot;
-use futures::executor::block_on;
 use std::io::Write;
 use std::mem;
 use std::path::{Path, PathBuf};
@@ -9,6 +6,11 @@ use std::sync::atomic::{AtomicBool, AtomicUsize};
 use std::sync::Arc;
 use std::time::Duration;

+use futures::channel::oneshot;
+use futures::executor::block_on;
+
+use super::*;
+
 #[cfg(feature = "mmap")]
 mod mmap_directory_tests {
    use crate::directory::MmapDirectory;
@@ -118,15 +120,6 @@ mod ram_directory_tests {
    }
 }

-#[test]
-#[should_panic]
-fn ram_directory_panics_if_flush_forgotten() {
-    let test_path: &'static Path = Path::new("some_path_for_test");
-    let ram_directory = RamDirectory::create();
-    let mut write_file = ram_directory.open_write(test_path).unwrap();
-    assert!(write_file.write_all(&[4]).is_ok());
-}
-
 fn test_simple(directory: &dyn Directory) -> crate::Result<()> {
    let test_path: &'static Path = Path::new("some_path_for_test");
    let mut write_file = directory.open_write(test_path)?;
--- a/src/directory/watch_event_router.rs
+++ b/src/directory/watch_event_router.rs
@@ -1,8 +1,7 @@
+use std::sync::{Arc, RwLock, Weak};
+
 use futures::channel::oneshot;
 use futures::{Future, TryFutureExt};
-use std::sync::Arc;
-use std::sync::RwLock;
-use std::sync::Weak;

 /// Cloneable wrapper for callbacks registered when watching files of a `Directory`.
 #[derive(Clone)]
@@ -103,12 +102,14 @@ impl WatchCallbackList {

 #[cfg(test)]
 mod tests {
-    use crate::directory::{WatchCallback, WatchCallbackList};
-    use futures::executor::block_on;
    use std::mem;
    use std::sync::atomic::{AtomicUsize, Ordering};
    use std::sync::Arc;

+    use futures::executor::block_on;
+
+    use crate::directory::{WatchCallback, WatchCallbackList};
+
    #[test]
    fn test_watch_event_router_simple() {
        let watch_event_router = WatchCallbackList::default();
--- a/src/docset.rs
+++ b/src/docset.rs
@@ -1,7 +1,7 @@
-use crate::fastfield::DeleteBitSet;
+use std::borrow::{Borrow, BorrowMut};
+
+use crate::fastfield::AliveBitSet;
 use crate::DocId;
-use std::borrow::Borrow;
-use std::borrow::BorrowMut;

 /// Sentinel value returned when a DocSet has been entirely consumed.
 ///
@@ -85,11 +85,11 @@ pub trait DocSet: Send {

    /// Returns the number documents matching.
    /// Calling this method consumes the `DocSet`.
-    fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
+    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
        let mut count = 0u32;
        let mut doc = self.doc();
        while doc != TERMINATED {
-            if !delete_bitset.is_deleted(doc) {
+            if alive_bitset.is_alive(doc) {
                count += 1u32;
            }
            doc = self.advance();
@@ -130,8 +130,8 @@ impl<'a> DocSet for &'a mut dyn DocSet {
        (**self).size_hint()
    }

-    fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
-        (**self).count(delete_bitset)
+    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
+        (**self).count(alive_bitset)
    }

    fn count_including_deleted(&mut self) -> u32 {
@@ -160,9 +160,9 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
        unboxed.size_hint()
    }

-    fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
+    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.count(delete_bitset)
+        unboxed.count(alive_bitset)
    }

    fn count_including_deleted(&mut self) -> u32 {
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,17 +1,14 @@
 //! Definition of Tantivy's error and result.

-use std::io;
-
-use crate::directory::error::{Incompatibility, LockError};
-use crate::fastfield::FastFieldNotAvailableError;
-use crate::query;
-use crate::{
-    directory::error::{OpenDirectoryError, OpenReadError, OpenWriteError},
-    schema,
-};
-use std::fmt;
 use std::path::PathBuf;
 use std::sync::PoisonError;
+use std::{fmt, io};
+
+use crate::directory::error::{
+    Incompatibility, LockError, OpenDirectoryError, OpenReadError, OpenWriteError,
+};
+use crate::fastfield::FastFieldNotAvailableError;
+use crate::{query, schema};

 /// Represents a `DataCorruption` error.
 ///
--- a/src/fastfield/alive_bitset.rs
+++ b/src/fastfield/alive_bitset.rs
@@ -0,0 +1,225 @@
+use std::io;
+use std::io::Write;
+
+use common::{intersect_bitsets, BitSet, ReadOnlyBitSet};
+use ownedbytes::OwnedBytes;
+
+use crate::space_usage::ByteCount;
+use crate::DocId;
+
+/// Write a alive `BitSet`
+///
+/// where `alive_bitset` is the set of alive `DocId`.
+/// Warning: this function does not call terminate. The caller is in charge of
+/// closing the writer properly.
+pub fn write_alive_bitset<T: Write>(alive_bitset: &BitSet, writer: &mut T) -> io::Result<()> {
+    alive_bitset.serialize(writer)?;
+    Ok(())
+}
+
+/// Set of alive `DocId`s.
+#[derive(Clone)]
+pub struct AliveBitSet {
+    num_alive_docs: usize,
+    bitset: ReadOnlyBitSet,
+}
+
+/// Intersects two AliveBitSets in a new one.
+/// The two bitsets need to have the same max_value.
+pub fn intersect_alive_bitsets(left: AliveBitSet, right: AliveBitSet) -> AliveBitSet {
+    assert_eq!(left.bitset().max_value(), right.bitset().max_value());
+    let bitset = intersect_bitsets(left.bitset(), right.bitset());
+    let num_alive_docs = bitset.len();
+    AliveBitSet {
+        num_alive_docs,
+        bitset,
+    }
+}
+
+impl AliveBitSet {
+    #[cfg(test)]
+    pub(crate) fn for_test_from_deleted_docs(deleted_docs: &[DocId], max_doc: u32) -> AliveBitSet {
+        assert!(deleted_docs.iter().all(|&doc| doc < max_doc));
+        let mut bitset = BitSet::with_max_value_and_full(max_doc);
+        for &doc in deleted_docs {
+            bitset.remove(doc);
+        }
+        let mut alive_bitset_buffer = Vec::new();
+        write_alive_bitset(&bitset, &mut alive_bitset_buffer).unwrap();
+        let alive_bitset_bytes = OwnedBytes::new(alive_bitset_buffer);
+        Self::open(alive_bitset_bytes)
+    }
+
+    pub(crate) fn from_bitset(bitset: &BitSet) -> AliveBitSet {
+        let readonly_bitset = ReadOnlyBitSet::from(bitset);
+        AliveBitSet::from(readonly_bitset)
+    }
+
+    /// Opens a delete bitset given its file.
+    pub fn open(bytes: OwnedBytes) -> AliveBitSet {
+        let bitset = ReadOnlyBitSet::open(bytes);
+        AliveBitSet::from(bitset)
+    }
+
+    /// Returns true iff the document is still "alive". In other words, if it has not been deleted.
+    #[inline]
+    pub fn is_alive(&self, doc: DocId) -> bool {
+        self.bitset.contains(doc)
+    }
+
+    /// Returns true iff the document has been marked as deleted.
+    #[inline]
+    pub fn is_deleted(&self, doc: DocId) -> bool {
+        !self.is_alive(doc)
+    }
+
+    /// Iterate over the alive doc_ids.
+    #[inline]
+    pub fn iter_alive(&self) -> impl Iterator<Item = DocId> + '_ {
+        self.bitset.iter()
+    }
+
+    /// Get underlying bitset
+    #[inline]
+    pub fn bitset(&self) -> &ReadOnlyBitSet {
+        &self.bitset
+    }
+
+    /// The number of deleted docs
+    pub fn num_alive_docs(&self) -> usize {
+        self.num_alive_docs
+    }
+
+    /// Summarize total space usage of this bitset.
+    pub fn space_usage(&self) -> ByteCount {
+        self.bitset().num_bytes()
+    }
+}
+
+impl From<ReadOnlyBitSet> for AliveBitSet {
+    fn from(bitset: ReadOnlyBitSet) -> AliveBitSet {
+        let num_alive_docs = bitset.len();
+        AliveBitSet {
+            num_alive_docs,
+            bitset,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::AliveBitSet;
+
+    #[test]
+    fn test_alive_bitset_empty() {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[], 10);
+        for doc in 0..10 {
+            assert_eq!(alive_bitset.is_deleted(doc), !alive_bitset.is_alive(doc));
+            assert!(!alive_bitset.is_deleted(doc));
+        }
+        assert_eq!(alive_bitset.num_alive_docs(), 10);
+    }
+
+    #[test]
+    fn test_alive_bitset() {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[1, 9], 10);
+        assert!(alive_bitset.is_alive(0));
+        assert!(alive_bitset.is_deleted(1));
+        assert!(alive_bitset.is_alive(2));
+        assert!(alive_bitset.is_alive(3));
+        assert!(alive_bitset.is_alive(4));
+        assert!(alive_bitset.is_alive(5));
+        assert!(alive_bitset.is_alive(6));
+        assert!(alive_bitset.is_alive(6));
+        assert!(alive_bitset.is_alive(7));
+        assert!(alive_bitset.is_alive(8));
+        assert!(alive_bitset.is_deleted(9));
+        for doc in 0..10 {
+            assert_eq!(alive_bitset.is_deleted(doc), !alive_bitset.is_alive(doc));
+        }
+        assert_eq!(alive_bitset.num_alive_docs(), 8);
+    }
+
+    #[test]
+    fn test_alive_bitset_iter_minimal() {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[7], 8);
+
+        let data: Vec<_> = alive_bitset.iter_alive().collect();
+        assert_eq!(data, vec![0, 1, 2, 3, 4, 5, 6]);
+    }
+
+    #[test]
+    fn test_alive_bitset_iter_small() {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[0, 2, 3, 6], 7);
+
+        let data: Vec<_> = alive_bitset.iter_alive().collect();
+        assert_eq!(data, vec![1, 4, 5]);
+    }
+    #[test]
+    fn test_alive_bitset_iter() {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[0, 1, 1000], 1001);
+
+        let data: Vec<_> = alive_bitset.iter_alive().collect();
+        assert_eq!(data, (2..=999).collect::<Vec<_>>());
+    }
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use rand::prelude::IteratorRandom;
+    use rand::thread_rng;
+    use test::Bencher;
+
+    use super::AliveBitSet;
+
+    fn get_alive() -> Vec<u32> {
+        let mut data = (0..1_000_000_u32).collect::<Vec<u32>>();
+        for _ in 0..(1_000_000) * 1 / 8 {
+            remove_rand(&mut data);
+        }
+        data
+    }
+
+    fn remove_rand(raw: &mut Vec<u32>) {
+        let i = (0..raw.len()).choose(&mut thread_rng()).unwrap();
+        raw.remove(i);
+    }
+
+    #[bench]
+    fn bench_deletebitset_iter_deser_on_fly(bench: &mut Bencher) {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[0, 1, 1000, 10000], 1_000_000);
+
+        bench.iter(|| alive_bitset.iter_alive().collect::<Vec<_>>());
+    }
+
+    #[bench]
+    fn bench_deletebitset_access(bench: &mut Bencher) {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&[0, 1, 1000, 10000], 1_000_000);
+
+        bench.iter(|| {
+            (0..1_000_000_u32)
+                .filter(|doc| alive_bitset.is_alive(*doc))
+                .collect::<Vec<_>>()
+        });
+    }
+
+    #[bench]
+    fn bench_deletebitset_iter_deser_on_fly_1_8_alive(bench: &mut Bencher) {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&get_alive(), 1_000_000);
+
+        bench.iter(|| alive_bitset.iter_alive().collect::<Vec<_>>());
+    }
+
+    #[bench]
+    fn bench_deletebitset_access_1_8_alive(bench: &mut Bencher) {
+        let alive_bitset = AliveBitSet::for_test_from_deleted_docs(&get_alive(), 1_000_000);
+
+        bench.iter(|| {
+            (0..1_000_000_u32)
+                .filter(|doc| alive_bitset.is_alive(*doc))
+                .collect::<Vec<_>>()
+        });
+    }
+}
--- a/src/fastfield/bytes/mod.rs
+++ b/src/fastfield/bytes/mod.rs
@@ -6,11 +6,12 @@ pub use self::writer::BytesFastFieldWriter;

 #[cfg(test)]
 mod tests {
-    use crate::schema::{BytesOptions, IndexRecordOption, Schema, Value};
-    use crate::{query::TermQuery, schema::FAST, schema::INDEXED, schema::STORED};
-    use crate::{DocAddress, DocSet, Index, Searcher, Term};
    use std::ops::Deref;

+    use crate::query::TermQuery;
+    use crate::schema::{BytesOptions, IndexRecordOption, Schema, Value, FAST, INDEXED, STORED};
+    use crate::{DocAddress, DocSet, Index, Searcher, Term};
+
    #[test]
    fn test_bytes() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
@@ -18,11 +19,11 @@ mod tests {
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_for_tests()?;
-        index_writer.add_document(doc!(bytes_field=>vec![0u8, 1, 2, 3]));
-        index_writer.add_document(doc!(bytes_field=>vec![]));
-        index_writer.add_document(doc!(bytes_field=>vec![255u8]));
-        index_writer.add_document(doc!(bytes_field=>vec![1u8, 3, 5, 7, 9]));
-        index_writer.add_document(doc!(bytes_field=>vec![0u8; 1000]));
+        index_writer.add_document(doc!(bytes_field=>vec![0u8, 1, 2, 3]))?;
+        index_writer.add_document(doc!(bytes_field=>vec![]))?;
+        index_writer.add_document(doc!(bytes_field=>vec![255u8]))?;
+        index_writer.add_document(doc!(bytes_field=>vec![1u8, 3, 5, 7, 9]))?;
+        index_writer.add_document(doc!(bytes_field=>vec![0u8; 1000]))?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let segment_reader = searcher.segment_reader(0);
@@ -47,7 +48,7 @@ mod tests {
        index_writer.add_document(doc!(
                field => b"tantivy".as_ref(),
                field => b"lucene".as_ref()
-        ));
+        ))?;
        index_writer.commit()?;
        Ok(index.reader()?.searcher())
    }
@@ -62,7 +63,7 @@ mod tests {
        assert_eq!(values.len(), 2);
        let values_bytes: Vec<&[u8]> = values
            .into_iter()
-            .flat_map(|value| value.bytes_value())
+            .flat_map(|value| value.as_bytes())
            .collect();
        assert_eq!(values_bytes, &[&b"tantivy"[..], &b"lucene"[..]]);
        Ok(())
--- a/src/fastfield/bytes/reader.rs
+++ b/src/fastfield/bytes/reader.rs
@@ -1,6 +1,5 @@
-use crate::directory::FileSlice;
-use crate::directory::OwnedBytes;
-use crate::fastfield::{BitpackedFastFieldReader, FastFieldReader, MultiValueLength};
+use crate::directory::{FileSlice, OwnedBytes};
+use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, MultiValueLength};
 use crate::DocId;

 /// Reader for byte array fast fields
@@ -15,13 +14,13 @@ use crate::DocId;
 /// and the start index for the next document, and keeping the bytes in between.
 #[derive(Clone)]
 pub struct BytesFastFieldReader {
-    idx_reader: BitpackedFastFieldReader<u64>,
+    idx_reader: DynamicFastFieldReader<u64>,
    values: OwnedBytes,
 }

 impl BytesFastFieldReader {
    pub(crate) fn open(
-        idx_reader: BitpackedFastFieldReader<u64>,
+        idx_reader: DynamicFastFieldReader<u64>,
        values_file: FileSlice,
    ) -> crate::Result<BytesFastFieldReader> {
        let values = values_file.read_bytes()?;
--- a/src/fastfield/bytes/writer.rs
+++ b/src/fastfield/bytes/writer.rs
@@ -1,10 +1,9 @@
 use std::io;

+use crate::fastfield::serializer::CompositeFastFieldSerializer;
+use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::schema::{Document, Field, Value};
 use crate::DocId;
-use crate::{
-    fastfield::serializer::CompositeFastFieldSerializer, indexer::doc_id_mapping::DocIdMapping,
-};

 /// Writer for byte array (as in, any number of bytes per document) fast fields
 ///
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -1,143 +0,0 @@
-use crate::common::{BitSet, HasLen};
-use crate::directory::FileSlice;
-use crate::directory::OwnedBytes;
-use crate::directory::WritePtr;
-use crate::space_usage::ByteCount;
-use crate::DocId;
-use std::io;
-use std::io::Write;
-
-/// Write a delete `BitSet`
-///
-/// where `delete_bitset` is the set of deleted `DocId`.
-/// Warning: this function does not call terminate. The caller is in charge of
-/// closing the writer properly.
-pub fn write_delete_bitset(
-    delete_bitset: &BitSet,
-    max_doc: u32,
-    writer: &mut WritePtr,
-) -> io::Result<()> {
-    let mut byte = 0u8;
-    let mut shift = 0u8;
-    for doc in 0..max_doc {
-        if delete_bitset.contains(doc) {
-            byte |= 1 << shift;
-        }
-        if shift == 7 {
-            writer.write_all(&[byte])?;
-            shift = 0;
-            byte = 0;
-        } else {
-            shift += 1;
-        }
-    }
-    if max_doc % 8 > 0 {
-        writer.write_all(&[byte])?;
-    }
-    Ok(())
-}
-
-/// Set of deleted `DocId`s.
-#[derive(Clone)]
-pub struct DeleteBitSet {
-    data: OwnedBytes,
-    num_deleted: usize,
-}
-
-impl DeleteBitSet {
-    #[cfg(test)]
-    pub(crate) fn for_test(docs: &[DocId], max_doc: u32) -> DeleteBitSet {
-        use crate::directory::{Directory, RamDirectory, TerminatingWrite};
-        use std::path::Path;
-        assert!(docs.iter().all(|&doc| doc < max_doc));
-        let mut bitset = BitSet::with_max_value(max_doc);
-        for &doc in docs {
-            bitset.insert(doc);
-        }
-        let directory = RamDirectory::create();
-        let path = Path::new("dummydeletebitset");
-        let mut wrt = directory.open_write(path).unwrap();
-        write_delete_bitset(&bitset, max_doc, &mut wrt).unwrap();
-        wrt.terminate().unwrap();
-        let file = directory.open_read(path).unwrap();
-        Self::open(file).unwrap()
-    }
-
-    /// Opens a delete bitset given its file.
-    pub fn open(file: FileSlice) -> crate::Result<DeleteBitSet> {
-        let bytes = file.read_bytes()?;
-        let num_deleted: usize = bytes
-            .as_slice()
-            .iter()
-            .map(|b| b.count_ones() as usize)
-            .sum();
-        Ok(DeleteBitSet {
-            data: bytes,
-            num_deleted,
-        })
-    }
-
-    /// Returns true iff the document is still "alive". In other words, if it has not been deleted.
-    pub fn is_alive(&self, doc: DocId) -> bool {
-        !self.is_deleted(doc)
-    }
-
-    /// Returns true iff the document has been marked as deleted.
-    #[inline]
-    pub fn is_deleted(&self, doc: DocId) -> bool {
-        let byte_offset = doc / 8u32;
-        let b: u8 = self.data.as_slice()[byte_offset as usize];
-        let shift = (doc & 7u32) as u8;
-        b & (1u8 << shift) != 0
-    }
-
-    /// The number of deleted docs
-    pub fn num_deleted(&self) -> usize {
-        self.num_deleted
-    }
-    /// Summarize total space usage of this bitset.
-    pub fn space_usage(&self) -> ByteCount {
-        self.data.len()
-    }
-}
-
-impl HasLen for DeleteBitSet {
-    fn len(&self) -> usize {
-        self.num_deleted
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::DeleteBitSet;
-    use crate::common::HasLen;
-
-    #[test]
-    fn test_delete_bitset_empty() {
-        let delete_bitset = DeleteBitSet::for_test(&[], 10);
-        for doc in 0..10 {
-            assert_eq!(delete_bitset.is_deleted(doc), !delete_bitset.is_alive(doc));
-        }
-        assert_eq!(delete_bitset.len(), 0);
-    }
-
-    #[test]
-    fn test_delete_bitset() {
-        let delete_bitset = DeleteBitSet::for_test(&[1, 9], 10);
-        assert!(delete_bitset.is_alive(0));
-        assert!(delete_bitset.is_deleted(1));
-        assert!(delete_bitset.is_alive(2));
-        assert!(delete_bitset.is_alive(3));
-        assert!(delete_bitset.is_alive(4));
-        assert!(delete_bitset.is_alive(5));
-        assert!(delete_bitset.is_alive(6));
-        assert!(delete_bitset.is_alive(6));
-        assert!(delete_bitset.is_alive(7));
-        assert!(delete_bitset.is_alive(8));
-        assert!(delete_bitset.is_deleted(9));
-        for doc in 0..10 {
-            assert_eq!(delete_bitset.is_deleted(doc), !delete_bitset.is_alive(doc));
-        }
-        assert_eq!(delete_bitset.len(), 2);
-    }
-}
--- a/src/fastfield/error.rs
+++ b/src/fastfield/error.rs
@@ -1,6 +1,7 @@
-use crate::schema::FieldEntry;
 use std::result;

+use crate::schema::FieldEntry;
+
 /// `FastFieldNotAvailableError` is returned when the
 /// user requested for a fast field reader, and the field was not
 /// defined in the schema as a fast field.
--- a/src/fastfield/facet_reader.rs
+++ b/src/fastfield/facet_reader.rs
@@ -1,10 +1,10 @@
+use std::str;
+
 use super::MultiValuedFastFieldReader;
 use crate::error::DataCorruption;
 use crate::schema::Facet;
-use crate::termdict::TermDictionary;
-use crate::termdict::TermOrdinal;
+use crate::termdict::{TermDictionary, TermOrdinal};
 use crate::DocId;
-use std::str;

 /// The facet reader makes it possible to access the list of
 /// facets associated to a given document in a specific
@@ -82,20 +82,17 @@ impl FacetReader {

 #[cfg(test)]
 mod tests {
-    use crate::Index;
-    use crate::{
-        schema::{Facet, FacetOptions, SchemaBuilder, Value, INDEXED, STORED},
-        DocAddress, Document,
-    };
+    use crate::schema::{Facet, FacetOptions, SchemaBuilder, Value, STORED};
+    use crate::{DocAddress, Document, Index};

    #[test]
    fn test_facet_only_indexed() -> crate::Result<()> {
        let mut schema_builder = SchemaBuilder::default();
-        let facet_field = schema_builder.add_facet_field("facet", INDEXED);
+        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_for_tests()?;
-        index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()));
+        index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let facet_reader = searcher
@@ -106,42 +103,19 @@ mod tests {
        facet_reader.facet_ords(0u32, &mut facet_ords);
        assert_eq!(&facet_ords, &[2u64]);
        let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
-        let value = doc.get_first(facet_field).and_then(Value::path);
+        let value = doc.get_first(facet_field).and_then(Value::as_facet);
        assert_eq!(value, None);
        Ok(())
    }

-    #[test]
-    fn test_facet_only_stored() -> crate::Result<()> {
-        let mut schema_builder = SchemaBuilder::default();
-        let facet_field = schema_builder.add_facet_field("facet", STORED);
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests()?;
-        index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()));
-        index_writer.commit()?;
-        let searcher = index.reader()?.searcher();
-        let facet_reader = searcher
-            .segment_reader(0u32)
-            .facet_reader(facet_field)
-            .unwrap();
-        let mut facet_ords = Vec::new();
-        facet_reader.facet_ords(0u32, &mut facet_ords);
-        assert!(facet_ords.is_empty());
-        let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
-        let value = doc.get_first(facet_field).and_then(Value::path);
-        assert_eq!(value, Some("/a/b".to_string()));
-        Ok(())
-    }
-
    #[test]
    fn test_facet_stored_and_indexed() -> crate::Result<()> {
        let mut schema_builder = SchemaBuilder::default();
-        let facet_field = schema_builder.add_facet_field("facet", STORED | INDEXED);
+        let facet_field = schema_builder.add_facet_field("facet", STORED);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_for_tests()?;
-        index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()));
+        index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let facet_reader = searcher
@@ -152,43 +126,20 @@ mod tests {
        facet_reader.facet_ords(0u32, &mut facet_ords);
        assert_eq!(&facet_ords, &[2u64]);
        let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
-        let value = doc.get_first(facet_field).and_then(Value::path);
-        assert_eq!(value, Some("/a/b".to_string()));
-        Ok(())
-    }
-
-    #[test]
-    fn test_facet_neither_stored_and_indexed() -> crate::Result<()> {
-        let mut schema_builder = SchemaBuilder::default();
-        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
-        let schema = schema_builder.build();
-        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests()?;
-        index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()));
-        index_writer.commit()?;
-        let searcher = index.reader()?.searcher();
-        let facet_reader = searcher
-            .segment_reader(0u32)
-            .facet_reader(facet_field)
-            .unwrap();
-        let mut facet_ords = Vec::new();
-        facet_reader.facet_ords(0u32, &mut facet_ords);
-        assert!(facet_ords.is_empty());
-        let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
-        let value = doc.get_first(facet_field).and_then(Value::path);
-        assert_eq!(value, None);
+        let value: Option<&Facet> = doc.get_first(facet_field).and_then(Value::as_facet);
+        assert_eq!(value, Facet::from_text("/a/b").ok().as_ref());
        Ok(())
    }

    #[test]
    fn test_facet_not_populated_for_all_docs() -> crate::Result<()> {
        let mut schema_builder = SchemaBuilder::default();
-        let facet_field = schema_builder.add_facet_field("facet", INDEXED);
+        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_for_tests()?;
-        index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()));
-        index_writer.add_document(Document::default());
+        index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
+        index_writer.add_document(Document::default())?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let facet_reader = searcher
@@ -206,12 +157,12 @@ mod tests {
    #[test]
    fn test_facet_not_populated_for_any_docs() -> crate::Result<()> {
        let mut schema_builder = SchemaBuilder::default();
-        let facet_field = schema_builder.add_facet_field("facet", INDEXED);
+        let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_for_tests()?;
-        index_writer.add_document(Document::default());
-        index_writer.add_document(Document::default());
+        index_writer.add_document(Document::default())?;
+        index_writer.add_document(Document::default())?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let facet_reader = searcher
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -1,53 +1,41 @@
-/*!
-Column oriented field storage for tantivy.
-
-It is the equivalent of `Lucene`'s `DocValues`.
-
-Fast fields is a column-oriented fashion storage of `tantivy`.
-
-It is designed for the fast random access of some document
-fields given a document id.
-
-`FastField` are useful when a field is required for all or most of
-the `DocSet` : for instance for scoring, grouping, filtering, or faceting.
-
-
-Fields have to be declared as `FAST` in the  schema.
-Currently only 64-bits integers (signed or unsigned) are
-supported.
-
-They are stored in a bit-packed fashion so that their
-memory usage is directly linear with the amplitude of the
-values stored.
-
-Read access performance is comparable to that of an array lookup.
-*/
+//! Column oriented field storage for tantivy.
+//!
+//! It is the equivalent of `Lucene`'s `DocValues`.
+//!
+//! Fast fields is a column-oriented fashion storage of `tantivy`.
+//!
+//! It is designed for the fast random access of some document
+//! fields given a document id.
+//!
+//! `FastField` are useful when a field is required for all or most of
+//! the `DocSet` : for instance for scoring, grouping, filtering, or faceting.
+//!
+//!
+//! Fields have to be declared as `FAST` in the  schema.
+//! Currently only 64-bits integers (signed or unsigned) are
+//! supported.
+//!
+//! They are stored in a bit-packed fashion so that their
+//! memory usage is directly linear with the amplitude of the
+//! values stored.
+//!
+//! Read access performance is comparable to that of an array lookup.

+pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveBitSet};
 pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
-pub use self::delete::write_delete_bitset;
-pub use self::delete::DeleteBitSet;
 pub use self::error::{FastFieldNotAvailableError, Result};
 pub use self::facet_reader::FacetReader;
 pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter};
-pub(crate) use self::reader::BitpackedFastFieldReader;
-pub use self::reader::DynamicFastFieldReader;
-pub use self::reader::FastFieldReader;
+pub use self::reader::{DynamicFastFieldReader, FastFieldReader};
 pub use self::readers::FastFieldReaders;
-pub use self::serializer::CompositeFastFieldSerializer;
-pub use self::serializer::FastFieldDataAccess;
-pub use self::serializer::FastFieldStats;
+pub use self::serializer::{CompositeFastFieldSerializer, FastFieldDataAccess, FastFieldStats};
 pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
-use crate::schema::Cardinality;
-use crate::schema::FieldType;
-use crate::schema::Value;
-use crate::{
-    chrono::{NaiveDateTime, Utc},
-    schema::Type,
-};
-use crate::{common, DocId};
+use crate::chrono::{NaiveDateTime, Utc};
+use crate::schema::{Cardinality, FieldType, Type, Value};
+use crate::DocId;

+mod alive_bitset;
 mod bytes;
-mod delete;
 mod error;
 mod facet_reader;
 mod multivalued;
@@ -109,7 +97,7 @@ impl FastValue for u64 {
    fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
        match *field_type {
            FieldType::U64(ref integer_options) => integer_options.get_fastfield_cardinality(),
-            FieldType::HierarchicalFacet(_) => Some(Cardinality::MultiValues),
+            FieldType::Facet(_) => Some(Cardinality::MultiValues),
            _ => None,
        }
    }
@@ -212,22 +200,20 @@ fn value_to_u64(value: &Value) -> u64 {
 #[cfg(test)]
 mod tests {

-    use super::*;
-    use crate::common::CompositeFile;
-    use crate::common::HasLen;
-    use crate::directory::{Directory, RamDirectory, WritePtr};
-    use crate::merge_policy::NoMergePolicy;
-    use crate::schema::Field;
-    use crate::schema::Schema;
-    use crate::schema::FAST;
-    use crate::schema::{Document, IntOptions};
-    use crate::{Index, SegmentId, SegmentReader};
+    use std::collections::HashMap;
+    use std::path::Path;
+
+    use common::HasLen;
    use once_cell::sync::Lazy;
    use rand::prelude::SliceRandom;
    use rand::rngs::StdRng;
    use rand::SeedableRng;
-    use std::collections::HashMap;
-    use std::path::Path;
+
+    use super::*;
+    use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
+    use crate::merge_policy::NoMergePolicy;
+    use crate::schema::{Document, Field, IntOptions, Schema, FAST};
+    use crate::{Index, SegmentId, SegmentReader};

    pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
        let mut schema_builder = Schema::builder();
@@ -406,7 +392,7 @@ mod tests {
            serializer.close().unwrap();
        }
        let file = directory.open_read(path).unwrap();
-        //assert_eq!(file.len(), 17710 as usize); //bitpacked size
+        // assert_eq!(file.len(), 17710 as usize); //bitpacked size
        assert_eq!(file.len(), 10175_usize); // linear interpol size
        {
            let fast_fields_composite = CompositeFile::open(&file)?;
@@ -496,18 +482,18 @@ mod tests {
    }

    #[test]
-    fn test_merge_missing_date_fast_field() {
+    fn test_merge_missing_date_fast_field() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let date_field = schema_builder.add_date_field("date", FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_for_tests().unwrap();
        index_writer.set_merge_policy(Box::new(NoMergePolicy));
-        index_writer.add_document(doc!(date_field =>crate::chrono::prelude::Utc::now()));
-        index_writer.commit().unwrap();
-        index_writer.add_document(doc!());
-        index_writer.commit().unwrap();
-        let reader = index.reader().unwrap();
+        index_writer.add_document(doc!(date_field =>crate::chrono::prelude::Utc::now()))?;
+        index_writer.commit()?;
+        index_writer.add_document(doc!())?;
+        index_writer.commit()?;
+        let reader = index.reader()?;
        let segment_ids: Vec<SegmentId> = reader
            .searcher()
            .segment_readers()
@@ -516,10 +502,10 @@ mod tests {
            .collect();
        assert_eq!(segment_ids.len(), 2);
        let merge_future = index_writer.merge(&segment_ids[..]);
-        let merge_res = futures::executor::block_on(merge_future);
-        assert!(merge_res.is_ok());
-        assert!(reader.reload().is_ok());
+        futures::executor::block_on(merge_future)?;
+        reader.reload()?;
        assert_eq!(reader.searcher().segment_readers().len(), 1);
+        Ok(())
    }

    #[test]
@@ -528,7 +514,7 @@ mod tests {
    }

    #[test]
-    fn test_datefastfield() {
+    fn test_datefastfield() -> crate::Result<()> {
        use crate::fastfield::FastValue;
        let mut schema_builder = Schema::builder();
        let date_field = schema_builder.add_date_field("date", FAST);
@@ -538,22 +524,22 @@ mod tests {
        );
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests().unwrap();
+        let mut index_writer = index.writer_for_tests()?;
        index_writer.set_merge_policy(Box::new(NoMergePolicy));
        index_writer.add_document(doc!(
            date_field => crate::DateTime::from_u64(1i64.to_u64()),
            multi_date_field => crate::DateTime::from_u64(2i64.to_u64()),
            multi_date_field => crate::DateTime::from_u64(3i64.to_u64())
-        ));
+        ))?;
        index_writer.add_document(doc!(
            date_field => crate::DateTime::from_u64(4i64.to_u64())
-        ));
+        ))?;
        index_writer.add_document(doc!(
            multi_date_field => crate::DateTime::from_u64(5i64.to_u64()),
            multi_date_field => crate::DateTime::from_u64(6i64.to_u64())
-        ));
-        index_writer.commit().unwrap();
-        let reader = index.reader().unwrap();
+        ))?;
+        index_writer.commit()?;
+        let reader = index.reader()?;
        let searcher = reader.searcher();
        assert_eq!(searcher.segment_readers().len(), 1);
        let segment_reader = searcher.segment_reader(0);
@@ -580,21 +566,22 @@ mod tests {
            assert_eq!(dates[0].timestamp(), 5i64);
            assert_eq!(dates[1].timestamp(), 6i64);
        }
+        Ok(())
    }
 }

 #[cfg(all(test, feature = "unstable"))]
 mod bench {
-    use super::tests::FIELD;
-    use super::tests::{generate_permutation, SCHEMA};
-    use super::*;
-    use crate::common::CompositeFile;
-    use crate::directory::{Directory, RamDirectory, WritePtr};
-    use crate::fastfield::FastFieldReader;
    use std::collections::HashMap;
    use std::path::Path;
+
    use test::{self, Bencher};

+    use super::tests::{generate_permutation, FIELD, SCHEMA};
+    use super::*;
+    use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
+    use crate::fastfield::FastFieldReader;
+
    #[bench]
    fn bench_intfastfield_linear_veclookup(b: &mut Bencher) {
        let permutation = generate_permutation();
--- a/src/fastfield/multivalued/mod.rs
+++ b/src/fastfield/multivalued/mod.rs
@@ -7,18 +7,20 @@ pub use self::writer::MultiValuedFastFieldWriter;
 #[cfg(test)]
 mod tests {

-    use crate::collector::TopDocs;
-    use crate::query::QueryParser;
-    use crate::schema::Cardinality;
-    use crate::schema::Facet;
-    use crate::schema::IntOptions;
-    use crate::schema::Schema;
-    use crate::schema::INDEXED;
-    use crate::Index;
    use chrono::Duration;
+    use futures::executor::block_on;
+    use proptest::strategy::Strategy;
+    use proptest::{prop_oneof, proptest};
+    use test_log::test;
+
+    use crate::collector::TopDocs;
+    use crate::indexer::NoMergePolicy;
+    use crate::query::QueryParser;
+    use crate::schema::{Cardinality, Facet, FacetOptions, IntOptions, Schema};
+    use crate::{Document, Index, Term};

    #[test]
-    fn test_multivalued_u64() {
+    fn test_multivalued_u64() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let field = schema_builder.add_u64_field(
            "multifield",
@@ -26,17 +28,17 @@ mod tests {
        );
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests().unwrap();
-        index_writer.add_document(doc!(field=>1u64, field=>3u64));
-        index_writer.add_document(doc!());
-        index_writer.add_document(doc!(field=>4u64));
-        index_writer.add_document(doc!(field=>5u64, field=>20u64,field=>1u64));
-        assert!(index_writer.commit().is_ok());
+        let mut index_writer = index.writer_for_tests()?;
+        index_writer.add_document(doc!(field=>1u64, field=>3u64))?;
+        index_writer.add_document(doc!())?;
+        index_writer.add_document(doc!(field=>4u64))?;
+        index_writer.add_document(doc!(field=>5u64, field=>20u64,field=>1u64))?;
+        index_writer.commit()?;

-        let searcher = index.reader().unwrap().searcher();
+        let searcher = index.reader()?.searcher();
        let segment_reader = searcher.segment_reader(0);
        let mut vals = Vec::new();
-        let multi_value_reader = segment_reader.fast_fields().u64s(field).unwrap();
+        let multi_value_reader = segment_reader.fast_fields().u64s(field)?;
        {
            multi_value_reader.get_vals(2, &mut vals);
            assert_eq!(&vals, &[4u64]);
@@ -49,61 +51,60 @@ mod tests {
            multi_value_reader.get_vals(1, &mut vals);
            assert!(vals.is_empty());
        }
+        Ok(())
    }

    #[test]
-    fn test_multivalued_date() {
+    fn test_multivalued_date() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let date_field = schema_builder.add_date_field(
            "multi_date_field",
            IntOptions::default()
                .set_fast(Cardinality::MultiValues)
                .set_indexed()
+                .set_fieldnorm()
                .set_stored(),
        );
        let time_i =
            schema_builder.add_i64_field("time_stamp_i", IntOptions::default().set_stored());
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests().unwrap();
+        let mut index_writer = index.writer_for_tests()?;
        let first_time_stamp = chrono::Utc::now();
        index_writer.add_document(
            doc!(date_field=>first_time_stamp, date_field=>first_time_stamp, time_i=>1i64),
-        );
-        index_writer.add_document(doc!(time_i=>0i64));
+        )?;
+        index_writer.add_document(doc!(time_i=>0i64))?;
        // add one second
-        index_writer
-            .add_document(doc!(date_field=>first_time_stamp + Duration::seconds(1), time_i=>2i64));
+        index_writer.add_document(
+            doc!(date_field=>first_time_stamp + Duration::seconds(1), time_i=>2i64),
+        )?;
        // add another second
        let two_secs_ahead = first_time_stamp + Duration::seconds(2);
-        index_writer.add_document(doc!(date_field=>two_secs_ahead, date_field=>two_secs_ahead,date_field=>two_secs_ahead, time_i=>3i64));
+        index_writer.add_document(doc!(date_field=>two_secs_ahead, date_field=>two_secs_ahead,date_field=>two_secs_ahead, time_i=>3i64))?;
        // add three seconds
-        index_writer
-            .add_document(doc!(date_field=>first_time_stamp + Duration::seconds(3), time_i=>4i64));
-        assert!(index_writer.commit().is_ok());
+        index_writer.add_document(
+            doc!(date_field=>first_time_stamp + Duration::seconds(3), time_i=>4i64),
+        )?;
+        index_writer.commit()?;

-        let reader = index.reader().unwrap();
+        let reader = index.reader()?;
        let searcher = reader.searcher();
        let reader = searcher.segment_reader(0);
        assert_eq!(reader.num_docs(), 5);

        {
            let parser = QueryParser::for_index(&index, vec![date_field]);
-            let query = parser
-                .parse_query(&format!("\"{}\"", first_time_stamp.to_rfc3339()))
-                .expect("could not parse query");
-            let results = searcher
-                .search(&query, &TopDocs::with_limit(5))
-                .expect("could not query index");
-
+            let query = parser.parse_query(&format!("\"{}\"", first_time_stamp.to_rfc3339()))?;
+            let results = searcher.search(&query, &TopDocs::with_limit(5))?;
            assert_eq!(results.len(), 1);
            for (_score, doc_address) in results {
-                let retrieved_doc = searcher.doc(doc_address).expect("cannot fetch doc");
+                let retrieved_doc = searcher.doc(doc_address)?;
                assert_eq!(
                    retrieved_doc
                        .get_first(date_field)
                        .expect("cannot find value")
-                        .date_value()
+                        .as_date()
                        .unwrap()
                        .timestamp(),
                    first_time_stamp.timestamp()
@@ -112,7 +113,7 @@ mod tests {
                    retrieved_doc
                        .get_first(time_i)
                        .expect("cannot find value")
-                        .i64_value(),
+                        .as_i64(),
                    Some(1i64)
                );
            }
@@ -120,12 +121,8 @@ mod tests {

        {
            let parser = QueryParser::for_index(&index, vec![date_field]);
-            let query = parser
-                .parse_query(&format!("\"{}\"", two_secs_ahead.to_rfc3339()))
-                .expect("could not parse query");
-            let results = searcher
-                .search(&query, &TopDocs::with_limit(5))
-                .expect("could not query index");
+            let query = parser.parse_query(&format!("\"{}\"", two_secs_ahead.to_rfc3339()))?;
+            let results = searcher.search(&query, &TopDocs::with_limit(5))?;

            assert_eq!(results.len(), 1);

@@ -135,7 +132,7 @@ mod tests {
                    retrieved_doc
                        .get_first(date_field)
                        .expect("cannot find value")
-                        .date_value()
+                        .as_date()
                        .unwrap()
                        .timestamp(),
                    two_secs_ahead.timestamp()
@@ -144,7 +141,7 @@ mod tests {
                    retrieved_doc
                        .get_first(time_i)
                        .expect("cannot find value")
-                        .i64_value(),
+                        .as_i64(),
                    Some(3i64)
                );
            }
@@ -157,10 +154,8 @@ mod tests {
                (first_time_stamp + Duration::seconds(1)).to_rfc3339(),
                (first_time_stamp + Duration::seconds(3)).to_rfc3339()
            );
-            let query = parser.parse_query(&range_q).expect("could not parse query");
-            let results = searcher
-                .search(&query, &TopDocs::with_limit(5))
-                .expect("could not query index");
+            let query = parser.parse_query(&range_q)?;
+            let results = searcher.search(&query, &TopDocs::with_limit(5))?;

            assert_eq!(results.len(), 2);
            for (i, doc_pair) in results.iter().enumerate() {
@@ -179,7 +174,7 @@ mod tests {
                    retrieved_doc
                        .get_first(date_field)
                        .expect("cannot find value")
-                        .date_value()
+                        .as_date()
                        .expect("value not of Date type")
                        .timestamp(),
                    (first_time_stamp + Duration::seconds(offset_sec)).timestamp()
@@ -188,16 +183,16 @@ mod tests {
                    retrieved_doc
                        .get_first(time_i)
                        .expect("cannot find value")
-                        .i64_value()
-                        .expect("value not of i64 type"),
-                    time_i_val
+                        .as_i64(),
+                    Some(time_i_val)
                );
            }
        }
+        Ok(())
    }

    #[test]
-    fn test_multivalued_i64() {
+    fn test_multivalued_i64() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let field = schema_builder.add_i64_field(
            "multifield",
@@ -205,14 +200,14 @@ mod tests {
        );
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests().unwrap();
-        index_writer.add_document(doc!(field=> 1i64, field => 3i64));
-        index_writer.add_document(doc!());
-        index_writer.add_document(doc!(field=> -4i64));
-        index_writer.add_document(doc!(field=> -5i64, field => -20i64, field=>1i64));
-        assert!(index_writer.commit().is_ok());
+        let mut index_writer = index.writer_for_tests()?;
+        index_writer.add_document(doc!(field=> 1i64, field => 3i64))?;
+        index_writer.add_document(doc!())?;
+        index_writer.add_document(doc!(field=> -4i64))?;
+        index_writer.add_document(doc!(field=> -5i64, field => -20i64, field=>1i64))?;
+        index_writer.commit()?;

-        let searcher = index.reader().unwrap().searcher();
+        let searcher = index.reader()?.searcher();
        let segment_reader = searcher.segment_reader(0);
        let mut vals = Vec::new();
        let multi_value_reader = segment_reader.fast_fields().i64s(field).unwrap();
@@ -224,18 +219,125 @@ mod tests {
        assert!(vals.is_empty());
        multi_value_reader.get_vals(3, &mut vals);
        assert_eq!(&vals, &[-5i64, -20i64, 1i64]);
+        Ok(())
    }
-    #[test]
-    #[ignore]
-    fn test_many_facets() {
+
+    fn test_multivalued_no_panic(ops: &[IndexingOp]) -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
-        let field = schema_builder.add_facet_field("facetfield", INDEXED);
+        let field = schema_builder.add_u64_field(
+            "multifield",
+            IntOptions::default()
+                .set_fast(Cardinality::MultiValues)
+                .set_indexed(),
+        );
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_for_tests().unwrap();
-        for i in 0..100_000 {
-            index_writer.add_document(doc!(field=> Facet::from(format!("/lang/{}", i).as_str())));
+        let mut index_writer = index.writer_for_tests()?;
+        index_writer.set_merge_policy(Box::new(NoMergePolicy));
+
+        for &op in ops {
+            match op {
+                IndexingOp::AddDoc { id } => {
+                    match id % 3 {
+                        0 => {
+                            index_writer.add_document(doc!())?;
+                        }
+                        1 => {
+                            let mut doc = Document::new();
+                            for _ in 0..5001 {
+                                doc.add_u64(field, id as u64);
+                            }
+                            index_writer.add_document(doc)?;
+                        }
+                        _ => {
+                            let mut doc = Document::new();
+                            doc.add_u64(field, id as u64);
+                            index_writer.add_document(doc)?;
+                        }
+                    };
+                }
+                IndexingOp::DeleteDoc { id } => {
+                    index_writer.delete_term(Term::from_field_u64(field, id as u64));
+                }
+                IndexingOp::Commit => {
+                    index_writer.commit().unwrap();
+                }
+                IndexingOp::Merge => {
+                    let segment_ids = index.searchable_segment_ids()?;
+                    if segment_ids.len() >= 2 {
+                        block_on(index_writer.merge(&segment_ids))?;
+                        index_writer.segment_updater().wait_merging_thread()?;
+                    }
+                }
+            }
        }
-        assert!(index_writer.commit().is_ok());
+
+        index_writer.commit()?;
+
+        // Merging the segments
+        {
+            let segment_ids = index
+                .searchable_segment_ids()
+                .expect("Searchable segments failed.");
+            if !segment_ids.is_empty() {
+                block_on(index_writer.merge(&segment_ids)).unwrap();
+                assert!(index_writer.wait_merging_threads().is_ok());
+            }
+        }
+        Ok(())
+    }
+
+    #[derive(Debug, Clone, Copy)]
+    enum IndexingOp {
+        AddDoc { id: u32 },
+        DeleteDoc { id: u32 },
+        Commit,
+        Merge,
+    }
+
+    fn operation_strategy() -> impl Strategy<Value = IndexingOp> {
+        prop_oneof![
+            (0u32..10u32).prop_map(|id| IndexingOp::DeleteDoc { id }),
+            (0u32..10u32).prop_map(|id| IndexingOp::AddDoc { id }),
+            (0u32..2u32).prop_map(|_| IndexingOp::Commit),
+            (0u32..1u32).prop_map(|_| IndexingOp::Merge),
+        ]
+    }
+
+    proptest! {
+        #[test]
+        fn test_multivalued_proptest(ops in proptest::collection::vec(operation_strategy(), 1..10)) {
+            assert!(test_multivalued_no_panic(&ops[..]).is_ok());
+        }
+    }
+
+    #[test]
+    fn test_multivalued_proptest_off_by_one_bug_1151() {
+        use IndexingOp::*;
+        let ops = [
+            AddDoc { id: 3 },
+            AddDoc { id: 1 },
+            AddDoc { id: 3 },
+            Commit,
+            Merge,
+        ];
+
+        assert!(test_multivalued_no_panic(&ops[..]).is_ok());
+    }
+
+    #[test]
+    #[ignore]
+    fn test_many_facets() -> crate::Result<()> {
+        let mut schema_builder = Schema::builder();
+        let field = schema_builder.add_facet_field("facetfield", FacetOptions::default());
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index.writer_for_tests()?;
+        for i in 0..100_000 {
+            index_writer
+                .add_document(doc!(field=> Facet::from(format!("/lang/{}", i).as_str())))?;
+        }
+        index_writer.commit()?;
+        Ok(())
    }
 }
--- a/src/fastfield/multivalued/reader.rs
+++ b/src/fastfield/multivalued/reader.rs
@@ -10,7 +10,6 @@ use crate::DocId;
 /// The `vals_reader` will access the concatenated list of all
 /// values for all reader.
 /// The `idx_reader` associated, for each document, the index of its first value.
-///
 #[derive(Clone)]
 pub struct MultiValuedFastFieldReader<Item: FastValue> {
    idx_reader: DynamicFastFieldReader<u64>,
@@ -91,27 +90,25 @@ impl<Item: FastValue> MultiValueLength for MultiValuedFastFieldReader<Item> {
 mod tests {

    use crate::core::Index;
-    use crate::schema::{Cardinality, Facet, IntOptions, Schema, INDEXED};
+    use crate::schema::{Cardinality, Facet, FacetOptions, IntOptions, Schema};

    #[test]
-    fn test_multifastfield_reader() {
+    fn test_multifastfield_reader() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
-        let facet_field = schema_builder.add_facet_field("facets", INDEXED);
+        let facet_field = schema_builder.add_facet_field("facets", FacetOptions::default());
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
-        let mut index_writer = index
-            .writer_for_tests()
-            .expect("Failed to create index writer.");
+        let mut index_writer = index.writer_for_tests()?;
        index_writer.add_document(doc!(
            facet_field => Facet::from("/category/cat2"),
            facet_field => Facet::from("/category/cat1"),
-        ));
-        index_writer.add_document(doc!(facet_field => Facet::from("/category/cat2")));
-        index_writer.add_document(doc!(facet_field => Facet::from("/category/cat3")));
-        index_writer.commit().expect("Commit failed");
-        let searcher = index.reader().unwrap().searcher();
+        ))?;
+        index_writer.add_document(doc!(facet_field => Facet::from("/category/cat2")))?;
+        index_writer.add_document(doc!(facet_field => Facet::from("/category/cat3")))?;
+        index_writer.commit()?;
+        let searcher = index.reader()?.searcher();
        let segment_reader = searcher.segment_reader(0);
-        let mut facet_reader = segment_reader.facet_reader(facet_field).unwrap();
+        let mut facet_reader = segment_reader.facet_reader(facet_field)?;

        let mut facet = Facet::root();
        {
@@ -145,10 +142,11 @@ mod tests {
            facet_reader.facet_ords(2, &mut vals);
            assert_eq!(&vals[..], &[4]);
        }
+        Ok(())
    }

    #[test]
-    fn test_multifastfield_reader_min_max() {
+    fn test_multifastfield_reader_min_max() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let field_options = IntOptions::default()
            .set_indexed()
@@ -163,15 +161,16 @@ mod tests {
            item_field => 2i64,
            item_field => 3i64,
            item_field => -2i64,
-        ));
-        index_writer.add_document(doc!(item_field => 6i64, item_field => 3i64));
-        index_writer.add_document(doc!(item_field => 4i64));
-        index_writer.commit().expect("Commit failed");
-        let searcher = index.reader().unwrap().searcher();
+        ))?;
+        index_writer.add_document(doc!(item_field => 6i64, item_field => 3i64))?;
+        index_writer.add_document(doc!(item_field => 4i64))?;
+        index_writer.commit()?;
+        let searcher = index.reader()?.searcher();
        let segment_reader = searcher.segment_reader(0);
-        let field_reader = segment_reader.fast_fields().i64s(item_field).unwrap();
+        let field_reader = segment_reader.fast_fields().i64s(item_field)?;

        assert_eq!(field_reader.min_value(), -2);
        assert_eq!(field_reader.max_value(), 6);
+        Ok(())
    }
 }
--- a/src/fastfield/multivalued/writer.rs
+++ b/src/fastfield/multivalued/writer.rs
@@ -1,13 +1,15 @@
+use std::io;
+
+use fnv::FnvHashMap;
+use tantivy_bitpacker::minmax;
+
 use crate::fastfield::serializer::BitpackedFastFieldSerializerLegacy;
-use crate::fastfield::CompositeFastFieldSerializer;
+use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer};
+use crate::indexer::doc_id_mapping::DocIdMapping;
 use crate::postings::UnorderedTermId;
 use crate::schema::{Document, Field};
 use crate::termdict::TermOrdinal;
 use crate::DocId;
-use crate::{fastfield::value_to_u64, indexer::doc_id_mapping::DocIdMapping};
-use fnv::FnvHashMap;
-use std::io;
-use tantivy_bitpacker::minmax;

 /// Writer for multi-valued (as in, more than one value per document)
 /// int fast field.
@@ -20,7 +22,8 @@ use tantivy_bitpacker::minmax;
 /// - add your document simply by calling `.add_document(...)`.
 ///
 /// The `MultiValuedFastFieldWriter` can be acquired from the
-/// fastfield writer, by calling [`.get_multivalue_writer(...)`](./struct.FastFieldsWriter.html#method.get_multivalue_writer).
+/// fastfield writer, by calling
+/// [`.get_multivalue_writer(...)`](./struct.FastFieldsWriter.html#method.get_multivalue_writer).
 ///
 /// Once acquired, writing is done by calling calls to
 /// `.add_document_vals(&[u64])` once per document.
@@ -76,7 +79,7 @@ impl MultiValuedFastFieldWriter {
        // facets are indexed in the `SegmentWriter` as we encode their unordered id.
        if !self.is_facet {
            for field_value in doc.field_values() {
-                if field_value.field() == self.field {
+                if field_value.field == self.field {
                    self.add_val(value_to_u64(field_value.value()));
                }
            }
@@ -131,7 +134,6 @@ impl MultiValuedFastFieldWriter {
    /// During the serialization of the segment, terms gets sorted and
    /// `tantivy` builds a mapping to convert this `UnorderedTermId` into
    /// term ordinals.
-    ///
    pub fn serialize(
        &self,
        serializer: &mut CompositeFastFieldSerializer,
--- a/src/fastfield/reader.rs
+++ b/src/fastfield/reader.rs
@@ -1,25 +1,25 @@
-use super::FastValue;
-use crate::common::BinarySerializable;
-use crate::common::CompositeFile;
-use crate::directory::FileSlice;
-use crate::directory::OwnedBytes;
-use crate::directory::{Directory, RamDirectory, WritePtr};
-use crate::fastfield::{CompositeFastFieldSerializer, FastFieldsWriter};
-use crate::schema::Schema;
-use crate::schema::FAST;
-use crate::DocId;
-use fastfield_codecs::bitpacked::BitpackedFastFieldReader as BitpackedReader;
-use fastfield_codecs::bitpacked::BitpackedFastFieldSerializer;
-use fastfield_codecs::linearinterpol::LinearInterpolFastFieldReader;
-use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
-use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldReader;
-use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
-use fastfield_codecs::FastFieldCodecReader;
-use fastfield_codecs::FastFieldCodecSerializer;
 use std::collections::HashMap;
 use std::marker::PhantomData;
 use std::path::Path;

+use common::BinarySerializable;
+use fastfield_codecs::bitpacked::{
+    BitpackedFastFieldReader as BitpackedReader, BitpackedFastFieldSerializer,
+};
+use fastfield_codecs::linearinterpol::{
+    LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
+};
+use fastfield_codecs::multilinearinterpol::{
+    MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
+};
+use fastfield_codecs::{FastFieldCodecReader, FastFieldCodecSerializer};
+
+use super::FastValue;
+use crate::directory::{CompositeFile, Directory, FileSlice, OwnedBytes, RamDirectory, WritePtr};
+use crate::fastfield::{CompositeFastFieldSerializer, FastFieldsWriter};
+use crate::schema::{Schema, FAST};
+use crate::DocId;
+
 /// FastFieldReader is the trait to access fast field data.
 pub trait FastFieldReader<Item: FastValue>: Clone {
    /// Return the value associated to the given document.
@@ -64,7 +64,6 @@ pub trait FastFieldReader<Item: FastValue>: Clone {
 #[derive(Clone)]
 /// DynamicFastFieldReader wraps different readers to access
 /// the various encoded fastfield data
-///
 pub enum DynamicFastFieldReader<Item: FastValue> {
    /// Bitpacked compressed fastfield data.
    Bitpacked(FastFieldReaderCodecWrapper<Item, BitpackedReader>),
@@ -146,7 +145,6 @@ impl<Item: FastValue> FastFieldReader<Item> for DynamicFastFieldReader<Item> {
 /// Wrapper for accessing a fastfield.
 ///
 /// Holds the data and the codec to the read the data.
-///
 #[derive(Clone)]
 pub struct FastFieldReaderCodecWrapper<Item: FastValue, CodecReader> {
    reader: CodecReader,
@@ -162,7 +160,8 @@ impl<Item: FastValue, C: FastFieldCodecReader> FastFieldReaderCodecWrapper<Item,
        assert_eq!(
            BitpackedFastFieldSerializer::ID,
            id,
-            "Tried to open fast field as bitpacked encoded (id=1), but got serializer with different id"
+            "Tried to open fast field as bitpacked encoded (id=1), but got serializer with \
+             different id"
        );
        Self::open_from_bytes(bytes)
    }
@@ -249,8 +248,6 @@ impl<Item: FastValue, C: FastFieldCodecReader + Clone> FastFieldReader<Item>
    }
 }

-pub(crate) type BitpackedFastFieldReader<Item> = FastFieldReaderCodecWrapper<Item, BitpackedReader>;
-
 impl<Item: FastValue> From<Vec<Item>> for DynamicFastFieldReader<Item> {
    fn from(vals: Vec<Item>) -> DynamicFastFieldReader<Item> {
        let mut schema_builder = Schema::builder();
--- a/src/fastfield/readers.rs
+++ b/src/fastfield/readers.rs
@@ -1,14 +1,12 @@
-use crate::common::CompositeFile;
-use crate::directory::FileSlice;
-use crate::fastfield::MultiValuedFastFieldReader;
-use crate::fastfield::{BitpackedFastFieldReader, FastFieldNotAvailableError};
-use crate::fastfield::{BytesFastFieldReader, FastValue};
+use super::reader::DynamicFastFieldReader;
+use crate::directory::{CompositeFile, FileSlice};
+use crate::fastfield::{
+    BytesFastFieldReader, FastFieldNotAvailableError, FastValue, MultiValuedFastFieldReader,
+};
 use crate::schema::{Cardinality, Field, FieldType, Schema};
 use crate::space_usage::PerFieldSpaceUsage;
 use crate::TantivyError;

-use super::reader::DynamicFastFieldReader;
-
 /// Provides access to all of the BitpackedFastFieldReader.
 ///
 /// Internally, `FastFieldReaders` have preloaded fast field readers,
@@ -40,7 +38,7 @@ fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality
        FieldType::Date(options) => options
            .get_fastfield_cardinality()
            .map(|cardinality| (FastType::Date, cardinality)),
-        FieldType::HierarchicalFacet(_) => Some((FastType::U64, Cardinality::MultiValues)),
+        FieldType::Facet(_) => Some((FastType::U64, Cardinality::MultiValues)),
        _ => None,
    }
 }
@@ -131,10 +129,11 @@ impl FastFieldReaders {
        self.typed_fast_field_reader(field)
    }

-    /// Returns the `u64` fast field reader reader associated to `field`, regardless of whether the given
-    /// field is effectively of type `u64` or not.
+    /// Returns the `u64` fast field reader reader associated to `field`, regardless of whether the
+    /// given field is effectively of type `u64` or not.
    ///
-    /// If not, the fastfield reader will returns the u64-value associated to the original FastValue.
+    /// If not, the fastfield reader will returns the u64-value associated to the original
+    /// FastValue.
    pub fn u64_lenient(&self, field: Field) -> crate::Result<DynamicFastFieldReader<u64>> {
        self.typed_fast_field_reader(field)
    }
@@ -171,8 +170,8 @@ impl FastFieldReaders {
        self.typed_fast_field_multi_reader(field)
    }

-    /// Returns a `u64s` multi-valued fast field reader reader associated to `field`, regardless of whether the given
-    /// field is effectively of type `u64` or not.
+    /// Returns a `u64s` multi-valued fast field reader reader associated to `field`, regardless of
+    /// whether the given field is effectively of type `u64` or not.
    ///
    /// If `field` is not a u64 multi-valued fast field, this method returns an Error.
    pub fn u64s_lenient(&self, field: Field) -> crate::Result<MultiValuedFastFieldReader<u64>> {
@@ -219,7 +218,7 @@ impl FastFieldReaders {
                )));
            }
            let fast_field_idx_file = self.fast_field_data(field, 0)?;
-            let idx_reader = BitpackedFastFieldReader::open(fast_field_idx_file)?;
+            let idx_reader = DynamicFastFieldReader::open(fast_field_idx_file)?;
            let data = self.fast_field_data(field, 1)?;
            BytesFastFieldReader::open(idx_reader, data)
        } else {
--- a/Show More
+++ b/Show More