Closes #896 - Facet reader related

Bugfix. Acquiring a facet reader on a segment that does not contain any doc with this facet returns `None`.
Bumped version and edited changelog
2026-06-07 02:50:40 +00:00 · 2020-10-01 20:25:28 +09:00 · 2020-09-19 21:13:19 +09:00 · 2020-09-19 21:04:44 +09:00 · 2020-09-19 21:04:29 +09:00 · 2020-08-19 22:41:48 +09:00
257 changed files with 29085 additions and 11727 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -0,0 +1,12 @@
+# These are supported funding model platforms
+
+github: fulmicoton
+patreon: # Replace with a single Patreon username
+open_collective: # Replace with a single Open Collective username
+ko_fi: # Replace with a single Ko-fi username
+tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
+community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
+liberapay: # Replace with a single Liberapay username
+issuehunt: # Replace with a single IssueHunt username
+otechie: # Replace with a single Otechie username
+custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,19 @@
+---
+name: Bug report
+about: Create a report to help us improve
+
+---
+
+**Describe the bug**
+- What did you do?
+- What happened?
+- What was expected?
+
+**Which version of tantivy are you using?**
+If "master",  ideally give the specific sha1 revision.
+
+**To Reproduce**
+
+If your bug is deterministic, can you give a minimal reproducing code?
+Some bugs are not deterministic. Can you describe with precision in which context it happened?
+If this is possible, can you share your code?
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,14 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**[Optional] describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
--- a/.github/ISSUE_TEMPLATE/question.md
+++ b/.github/ISSUE_TEMPLATE/question.md
@@ -0,0 +1,7 @@
+---
+name: Question
+about: Ask any question about tantivy's usage...
+
+---
+
+Try to be specific about your use case...
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+tantivy.iml
+proptest-regressions
 *.swp
 target
 target/debug
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,112 +9,77 @@ sudo: required
 env:
  global:
    - CRATE_NAME=tantivy
+    - TRAVIS_CARGO_NIGHTLY_FEATURE=""
+    # - secure: eC8HjTi1wgRVCsMAeXEXt8Ckr0YBSGOEnQkkW4/Nde/OZ9jJjz2nmP1ELQlDE7+czHub2QvYtDMG0parcHZDx/Kus0yvyn08y3g2rhGIiE7y8OCvQm1Mybu2D/p7enm6shXquQ6Z5KRfRq+18mHy80wy9ABMA/ukEZdvnfQ76/Een8/Lb0eHaDoXDXn3PqLVtByvSfQQ7OhS60dEScu8PWZ6/l1057P5NpdWbMExBE7Ro4zYXNhkJeGZx0nP/Bd4Jjdt1XfPzMEybV6NZ5xsTILUBFTmOOt603IsqKGov089NExqxYu5bD3K+S4MzF1Nd6VhomNPJqLDCfhlymJCUj5n5Ku4yidlhQbM4Ej9nGrBalJnhcjBjPua5tmMF2WCxP9muKn/2tIOu1/+wc0vMf9Yd3wKIkf5+FtUxCgs2O+NslWvmOMAMI/yD25m7hb4t1IwE/4Bk+GVcWJRWXbo0/m6ZUHzRzdjUY2a1qvw7C9udzdhg7gcnXwsKrSWi2NjMiIVw86l+Zim0nLpKIN41sxZHLaFRG63Ki8zQ/481LGn32awJ6i3sizKS0WD+N1DfR2qYMrwYHaMN0uR0OFXYTJkFvTFttAeUY3EKmRKAuMhmO2YRdSr4/j/G5E9HMc1gSGJj6PxgpQU7EpvxRsmoVAEJr0mszmOj9icGHep/FM=
+
+addons:
+  apt:
+    sources:
+      - ubuntu-toolchain-r-test
+      - kalakris-cmake
+    packages:
+      - gcc-4.8
+      - g++-4.8
+      - libcurl4-openssl-dev
+      - libelf-dev
+      - libdw-dev
+      - binutils-dev
+      - cmake

 matrix:
  include:
    # Android
    - env: TARGET=aarch64-linux-android DISABLE_TESTS=1
-    - env: TARGET=arm-linux-androideabi DISABLE_TESTS=1
-    - env: TARGET=armv7-linux-androideabi DISABLE_TESTS=1
-    - env: TARGET=i686-linux-android DISABLE_TESTS=1
-    - env: TARGET=x86_64-linux-android DISABLE_TESTS=1
-
-    # iOS
-    #- env: TARGET=aarch64-apple-ios DISABLE_TESTS=1
-    #  os: osx
-    #- env: TARGET=armv7-apple-ios DISABLE_TESTS=1
-    #  os: osx
-    #- env: TARGET=armv7s-apple-ios DISABLE_TESTS=1
-    #  os: osx
-    #- env: TARGET=i386-apple-ios DISABLE_TESTS=1
-    #  os: osx
-    - env: TARGET=x86_64-apple-ios DISABLE_TESTS=1
-      os: osx
+    #- env: TARGET=arm-linux-androideabi DISABLE_TESTS=1
+    #- env: TARGET=armv7-linux-androideabi DISABLE_TESTS=1
+    #- env: TARGET=i686-linux-android DISABLE_TESTS=1
+    #- env: TARGET=x86_64-linux-android DISABLE_TESTS=1

    # Linux
-    - env: TARGET=aarch64-unknown-linux-gnu
-    # - env: TARGET=arm-unknown-linux-gnueabi
-    # - env: TARGET=armv7-unknown-linux-gnueabihf
-    - env: TARGET=i686-unknown-linux-gnu
-    #- env: TARGET=i686-unknown-linux-musl
-    #- env: TARGET=mips-unknown-linux-gnu
-    #- env: TARGET=mips64-unknown-linux-gnuabi64
-    #- env: TARGET=mips64el-unknown-linux-gnuabi64
-    #- env: TARGET=mipsel-unknown-linux-gnu
-    #- env: TARGET=powerpc-unknown-linux-gnu
-    #- env: TARGET=powerpc64-unknown-linux-gnu
-    #- env: TARGET=powerpc64le-unknown-linux-gnu
-    #- env: TARGET=s390x-unknown-linux-gnu DISABLE_TESTS=1
-    - env: TARGET=x86_64-unknown-linux-gnu
-    - env: TARGET=x86_64-unknown-linux-musl
-
+    #- env: TARGET=aarch64-unknown-linux-gnu
+    #- env: TARGET=i686-unknown-linux-gnu
+    - env: TARGET=x86_64-unknown-linux-gnu CODECOV=1 #UPLOAD_DOCS=1
+    # - env: TARGET=x86_64-unknown-linux-musl CODECOV=1
    # OSX
-    #- env: TARGET=i686-apple-darwin
-    #  os: osx
-    - env: TARGET=x86_64-apple-darwin
-      os: osx
-
-    # *BSD
-    #- env: TARGET=i686-unknown-freebsd DISABLE_TESTS=1
-    #- env: TARGET=x86_64-unknown-freebsd DISABLE_TESTS=1
-    #- env: TARGET=x86_64-unknown-netbsd DISABLE_TESTS=1
-
-    # Windows
-    #- env: TARGET=x86_64-pc-windows-gnu
-
-    # Bare metal
-    # These targets don't support std and as such are likely not suitable for
-    # most crates.
-    # - env: TARGET=thumbv6m-none-eabi
-    # - env: TARGET=thumbv7em-none-eabi
-    # - env: TARGET=thumbv7em-none-eabihf
-    # - env: TARGET=thumbv7m-none-eabi
-
-    # Testing other channels
-    #- env: TARGET=x86_64-unknown-linux-gnu
-    #  rust: nightly
    #- env: TARGET=x86_64-apple-darwin
    #  os: osx
-    #  rust: nightly

 before_install:
  - set -e
  - rustup self update
+  - rustup component add rustfmt

 install:
  - sh ci/install.sh
  - source ~/.cargo/env || true
+  - env | grep "TRAVIS"
+
+before_script:
+  - export PATH=$HOME/.cargo/bin:$PATH
+  - cargo install cargo-update || echo "cargo-update already installed"
+  - cargo install cargo-travis || echo "cargo-travis already installed"

 script:
  - bash ci/script.sh
-
-after_script: set +e
+  - cargo fmt --all -- --check

 before_deploy:
  - sh ci/before_deploy.sh
-#
-#deploy:
-#  # - Create a `public_repo` GitHub token. Go to: https://github.com/settings/tokens/new
-#  # - Encrypt it: `travis encrypt 0123456789012345678901234567890123456789
-#  # - Paste the output down here
-#  api_key:
-#    secure: eC8HjTi1wgRVCsMAeXEXt8Ckr0YBSGOEnQkkW4/Nde/OZ9jJjz2nmP1ELQlDE7+czHub2QvYtDMG0parcHZDx/Kus0yvyn08y3g2rhGIiE7y8OCvQm1Mybu2D/p7enm6shXquQ6Z5KRfRq+18mHy80wy9ABMA/ukEZdvnfQ76/Een8/Lb0eHaDoXDXn3PqLVtByvSfQQ7OhS60dEScu8PWZ6/l1057P5NpdWbMExBE7Ro4zYXNhkJeGZx0nP/Bd4Jjdt1XfPzMEybV6NZ5xsTILUBFTmOOt603IsqKGov089NExqxYu5bD3K+S4MzF1Nd6VhomNPJqLDCfhlymJCUj5n5Ku4yidlhQbM4Ej9nGrBalJnhcjBjPua5tmMF2WCxP9muKn/2tIOu1/+wc0vMf9Yd3wKIkf5+FtUxCgs2O+NslWvmOMAMI/yD25m7hb4t1IwE/4Bk+GVcWJRWXbo0/m6ZUHzRzdjUY2a1qvw7C9udzdhg7gcnXwsKrSWi2NjMiIVw86l+Zim0nLpKIN41sxZHLaFRG63Ki8zQ/481LGn32awJ6i3sizKS0WD+N1DfR2qYMrwYHaMN0uR0OFXYTJkFvTFttAeUY3EKmRKAuMhmO2YRdSr4/j/G5E9HMc1gSGJj6PxgpQU7EpvxRsmoVAEJr0mszmOj9icGHep/FM=
-#  file_glob: true
-#  file: $CRATE_NAME-$TRAVIS_TAG-$TARGET.*
-#  on:
-#    # TODO Here you can pick which targets will generate binary releases
-#    # In this example, there are some targets that are tested using the stable
-#    # and nightly channels. This condition makes sure there is only one release
-#    # for such targets and that's generated using the stable channel
-#    condition: $TRAVIS_RUST_VERSION = stable
-#    tags: true
-#  provider: releases
-#  skip_cleanup: true

-cache: cargo
-before_cache:
-  # Travis can't cache files that are not readable by "others"
-  - chmod -R a+r $HOME/.cargo
+after_success:
+  # Needs GH_TOKEN env var to be set in travis settings
+  - if [[ -v GH_TOKEN ]]; then echo "GH TOKEN IS SET"; else echo "GH TOKEN NOT SET"; fi
+  - if [[ -v UPLOAD_DOCS ]]; then cargo doc; cargo doc-upload; else echo "doc upload disabled."; fi
+
+#cache: cargo
+#before_cache:
+#  # Travis can't cache files that are not readable by "others"
+#  - chmod -R a+r $HOME/.cargo
+#  - find ./target/debug -type f -maxdepth 1 -delete
+#  - rm -f  ./target/.rustc_info.json
+#  - rm -fr ./target/debug/{deps,.fingerprint}/tantivy*
+#  - rm -r target/debug/examples/
+#  - ls -1 examples/ | sed -e 's/\.rs$//' | xargs -I "{}" find target/* -name "*{}*" -type f -delete

 #branches:
 #  only:
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,240 @@
+Tantivy 0.13.2
+===================
+Bugfix. Acquiring a facet reader on a segment that does not contain any 
+doc with this facet returns `None`. (#896)
+
+Tantivy 0.13.1
+======================
+Made `Query` and `Collector` `Send + Sync`.
+Updated misc dependency versions.
+
+
+Tantivy 0.13.0
+======================
+Tantivy 0.13 introduce a change in the index format that will require
+you to reindex your index (BlockWAND information are added in the skiplist). 
+The index size increase is minor as this information is only added for
+full blocks.
+If you have a massive index for which reindexing is not an option, please contact me
+so that we can discuss possible solutions.
+
+- Bugfix in `FuzzyTermQuery` not matching terms by prefix when it should (@Peachball)
+- Relaxed constraints on the custom/tweak score functions. At the segment level, they can be mut, and they are not required to be Sync + Send.
+- `MMapDirectory::open` does not return a `Result` anymore.
+- Change in the DocSet and Scorer API. (@fulmicoton). 
+A freshly created DocSet point directly to their first doc. A sentinel value called TERMINATED marks the end of a DocSet.
+`.advance()` returns the new DocId. `Scorer::skip(target)` has been replaced by `Scorer::seek(target)` and returns the resulting DocId.
+As a result, iterating through DocSet now looks as follows
+```rust
+let mut doc = docset.doc();
+while doc != TERMINATED {
+   // ...
+   doc = docset.advance();
+}
+```
+The change made it possible to greatly simplify a lot of the docset's code.
+- Misc internal optimization and introduction of the `Scorer::for_each_pruning` function. (@fulmicoton)
+- Added an offset option to the Top(.*)Collectors. (@robyoung)
+- Added Block WAND. Performance on TOP-K on term-unions should be greatly increased. (@fulmicoton, and special thanks 
+to the PISA team for answering all my questions!)
+
+Tantivy 0.12.0
+======================
+- Removing static dispatch in tokenizers for simplicity. (#762)
+- Added backward iteration for `TermDictionary` stream. (@halvorboe)
+- Fixed a performance issue when searching for the posting lists of a missing term (@audunhalland)
+- Added a configurable maximum number of docs (10M by default) for a segment to be considered for merge (@hntd187, landed by @halvorboe #713) 
+- Important Bugfix #777, causing tantivy to retain memory mapping. (diagnosed by @poljar)
+- Added support for field boosting. (#547, @fulmicoton)
+
+## How to update?
+
+Crates relying on custom tokenizer, or registering tokenizer in the manager will require some 
+minor changes. Check https://github.com/tantivy-search/tantivy/blob/master/examples/custom_tokenizer.rs
+to check for some code sample.
+
+Tantivy 0.11.3
+=======================
+- Fixed DateTime as a fast field (#735)
+
+Tantivy 0.11.2
+=======================
+- The future returned by `IndexWriter::merge` does not borrow `self` mutably anymore (#732)
+- Exposing a constructor for `WatchHandle` (#731)
+
+Tantivy 0.11.1
+=====================
+- Bug fix #729
+
+
+Tantivy 0.11.0
+=====================
+
+- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
+- Various bugfixes in the query parser.
+    - Better handling of hyphens in query parser. (#609)
+    - Better handling of whitespaces.
+- Closes #498 - add support for Elastic-style unbounded range queries for alphanumeric types eg. "title:>hello", "weight:>=70.5", "height:<200" (@petr-tik)
+- API change around `Box<BoxableTokenizer>`. See detail in #629
+- Avoid rebuilding Regex automaton whenever a regex query is reused. #639 (@brainlock)
+- Add footer with some metadata to index files. #605 (@fdb-hiroshima)
+- Add a method to check the compatibility of the footer in the index with the running version of tantivy (@petr-tik)
+- TopDocs collector: ensure stable sorting on equal score. #671 (@brainlock)
+- Added handling of pre-tokenized text fields (#642), which will enable users to
+  load tokens created outside tantivy. See usage in examples/pre_tokenized_text. (@kkoziara)
+- Fix crash when committing multiple times with deleted documents. #681 (@brainlock)
+
+## How to update?
+
+- The index format is changed. You are required to reindex your data to use tantivy 0.11. 
+- `Box<dyn BoxableTokenizer>` has been replaced by a `BoxedTokenizer` struct.
+- Regex are now compiled when the `RegexQuery` instance is built. As a result, it can now return
+an error and handling the `Result` is required.
+- `tantivy::version()` now returns a `Version` object. This object implements `ToString()`
+
+Tantivy 0.10.2
+=====================
+
+- Closes #656. Solving memory leak.
+
+Tantivy 0.10.1
+=====================
+
+- Closes #544.  A few users experienced problems with the directory watching system.
+Avoid watching the mmap directory until someone effectively creates a reader that uses
+this functionality.
+
+
+Tantivy 0.10.0
+=====================
+
+*Tantivy 0.10.0 index format is compatible with the index format in 0.9.0.*
+
+- Added an API to easily tweak or entirely replace the 
+ default score. See `TopDocs::tweak_score`and `TopScore::custom_score` (@pmasurel)
+- Added an ASCII folding filter (@drusellers)
+- Bugfix in `query.count` in presence of deletes (@pmasurel)
+- Added `.explain(...)` in `Query` and `Weight` to (@pmasurel)
+- Added an efficient way to `delete_all_documents` in `IndexWriter` (@petr-tik). 
+  All segments are simply removed.
+
+Minor
+---------
+- Switched to Rust 2018 (@uvd)
+- Small simplification of the code. 
+Calling .freq() or .doc() when .advance() has never been called
+on segment postings should panic from now on.
+- Tokens exceeding `u16::max_value() - 4` chars are discarded silently instead of panicking.
+- Fast fields are now preloaded when the `SegmentReader` is created.
+- `IndexMeta` is now public.  (@hntd187)
+- `IndexWriter` `add_document`, `delete_term`. `IndexWriter` is `Sync`, making it possible to use it with a `
+Arc<RwLock<IndexWriter>>`. `add_document` and `delete_term` can 
+only require a read lock. (@pmasurel)
+- Introducing `Opstamp` as an expressive type alias for `u64`. (@petr-tik)
+- Stamper now relies on `AtomicU64` on all platforms (@petr-tik)
+- Bugfix - Files get deleted slightly earlier
+- Compilation resources improved (@fdb-hiroshima)
+
+## How to update?
+
+Your program should be usable as is.
+
+### Fast fields
+
+Fast fields used to be accessed directly from the `SegmentReader`.
+The API changed, you are now required to acquire your fast field reader via the
+`segment_reader.fast_fields()`, and use one of the typed method: 
+- `.u64()`, `.i64()` if your field is single-valued ;
+- `.u64s()`, `.i64s()` if your field is multi-valued ;
+- `.bytes()` if your field is bytes fast field.
+
+
+
+Tantivy 0.9.0
+=====================
+*0.9.0 index format is not compatible with the 
+previous index format.*
+- MAJOR BUGFIX : 
+  Some `Mmap` objects were being leaked, and would never get released. (@fulmicoton)
+- Removed most unsafe (@fulmicoton)
+- Indexer memory footprint improved. (VInt comp, inlining the first block. (@fulmicoton)
+- Stemming in other language possible (@pentlander)
+- Segments with no docs are deleted earlier (@barrotsteindev)
+- Added grouped add and delete operations. 
+  They are guaranteed to happen together (i.e. they cannot be split by a commit). 
+  In addition, adds are guaranteed to happen on the same segment. (@elbow-jason)
+- Removed `INT_STORED` and `INT_INDEXED`. It is now possible to use `STORED` and `INDEXED`
+  for int fields. (@fulmicoton)
+- Added DateTime field (@barrotsteindev)
+- Added IndexReader. By default, index is reloaded automatically upon new commits (@fulmicoton)
+- SIMD linear search within blocks (@fulmicoton)
+
+## How to update ?
+
+tantivy 0.9 brought some API breaking change.
+To update from tantivy 0.8, you will need to go through the following steps.
+
+- `schema::INT_INDEXED` and `schema::INT_STORED`  should be replaced by `schema::INDEXED` and `schema::INT_STORED`.
+- The index now does not hold the pool of searcher anymore. You are required to create an intermediary object called 
+`IndexReader` for this. 
+    
+    ```rust
+    // create the reader. You typically need to create 1 reader for the entire
+    // lifetime of you program.
+    let reader = index.reader()?;
+    
+    // Acquire a searcher (previously `index.searcher()`) is now written:
+    let searcher = reader.searcher();
+    
+    // With the default setting of the reader, you are not required to 
+    // call `index.load_searchers()` anymore.
+    //
+    // The IndexReader will pick up that change automatically, regardless
+    // of whether the update was done in a different process or not.
+    // If this behavior is not wanted, you can create your reader with 
+    // the `ReloadPolicy::Manual`, and manually decide when to reload the index
+    // by calling `reader.reload()?`.
+  
+    ```
+
+
+Tantivy 0.8.2
+=====================
+Fixing build for x86_64 platforms. (#496)
+No need to update from 0.8.1 if tantivy
+is building on your platform.
+
+
+Tantivy 0.8.1
+=====================
+Hotfix of #476.
+
+Merge was reflecting deletes before commit was passed. 
+Thanks @barrotsteindev  for reporting the bug.
+
+
+Tantivy 0.8.0
+=====================
+*No change in the index format*
+- API Breaking change in the collector API. (@jwolfe, @fulmicoton)
+- Multithreaded search (@jwolfe, @fulmicoton) 
+
+
+Tantivy 0.7.1
+=====================
+*No change in the index format*
+- Bugfix: NGramTokenizer panics on non ascii chars
+- Added a space usage API
+
+Tantivy 0.7
+=====================
+- Skip data for doc ids and positions (@fulmicoton),
+  greatly improving performance
+- Tantivy error now rely on the failure crate (@drusellers)
+- Added support for `AND`, `OR`, `NOT` syntax in addition to the `+`,`-` syntax
+- Added a snippet generator with highlight (@vigneshsarma, @fulmicoton)
+- Added a `TopFieldCollector` (@pentlander)
+
 Tantivy 0.6.1
 =========================
 - Bugfix #324. GC removing was removing file that were still in useful
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,78 +1,101 @@
 [package]
 name = "tantivy"
-version = "0.6.1"
+version = "0.13.2"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
 description = """Search engine library"""
-documentation = "https://tantivy-search.github.io/tantivy/tantivy/index.html"
+documentation = "https://docs.rs/tantivy/"
 homepage = "https://github.com/tantivy-search/tantivy"
 repository = "https://github.com/tantivy-search/tantivy"
 readme = "README.md"
 keywords = ["search", "information", "retrieval"]
+edition = "2018"

 [dependencies]
-base64 = "0.9.1"
-byteorder = "1.0"
-lazy_static = "0.2.1"
-tinysegmenter = "0.1.0"
-regex = "0.2"
-fst = {version="0.3", default-features=false}
-fst-regex = { version="0.2" }
-lz4 = {version="1.20", optional=true}
-snap = {version="0.2"}
-atomicwrites = {version="0.2.2", optional=true}
-tempfile = "2.1"
-log = "0.3.6"
-combine = "2.2"
-tempdir = "0.3"
-serde = "1.0"
-serde_derive = "1.0"
-serde_json = "1.0"
-num_cpus = "1.2"
-itertools = "0.5.9"
-levenshtein_automata = {version="0.1", features=["fst_automaton"]}
-bit-set = "0.4.0"
-uuid = { version = "0.6", features = ["v4", "serde"] }
-chan = "0.1"
-crossbeam = "0.3"
-futures = "0.1"
-futures-cpupool = "0.1"
-error-chain = "0.8"
-owning_ref = "0.3"
-stable_deref_trait = "1.0.0"
-rust-stemmers = "0.1.0"
-downcast = { version="0.9" }
-matches = "0.1"
-bitpacking = "0.5"
-census = "0.1"
-fnv = "1.0.6"
-owned-read = "0.1"
+base64 = "0.12"
+byteorder = "1"
+crc32fast = "1"
+once_cell = "1"
+regex ={version = "1", default-features = false, features = ["std"]}
+tantivy-fst = "0.3"
+memmap = {version = "0.7", optional=true}
+lz4 = {version="1", optional=true}
+snap = "1"
+atomicwrites = {version="0.2", optional=true}
+tempfile = "3"
+log = "0.4"
+serde = {version="1", features=["derive"]}
+serde_json = "1"
+num_cpus = "1"
+fs2={version="0.4", optional=true}
+levenshtein_automata = "0.2"
+notify = {version="4", optional=true}
+uuid = { version = "0.8", features = ["v4", "serde"] }
+crossbeam = "0.7"
+futures = {version = "0.3",  features=["thread-pool"] }
+owning_ref = "0.4"
+stable_deref_trait = "1"
+rust-stemmers = "1"
+downcast-rs = "1"
+tantivy-query-grammar = { version="0.13", path="./query-grammar" }
+bitpacking = {version="0.8", default-features = false, features=["bitpacker4x"]}
+census = "0.4"
+fnv = "1"
+owned-read = "0.4"
+failure = "0.1"
+htmlescape = "0.3"
+fail = "0.4"
+murmurhash32 = "0.2"
+chrono = "0.4"
+smallvec = "1"
+rayon = "1"

 [target.'cfg(windows)'.dependencies]
-winapi = "0.2"
+winapi = "0.3"

 [dev-dependencies]
-rand = "0.3"
-env_logger = "0.4"
+rand = "0.7"
+maplit = "1"
+matches = "0.1.8"
+proptest = "0.10"
+
+[dev-dependencies.fail]
+version = "0.4"
+features = ["failpoints"]

 [profile.release]
 opt-level = 3
 debug = false
-lto = true
 debug-assertions = false

+[profile.test]
+debug-assertions = true
+overflow-checks = true
+
 [features]
 default = ["mmap"]
-mmap = ["fst/mmap", "atomicwrites"]
+mmap = ["atomicwrites", "fs2", "memmap", "notify"]
 lz4-compression = ["lz4"]
+failpoints = ["fail/failpoints"]
+unstable = [] # useful for benches.
+wasm-bindgen = ["uuid/wasm-bindgen"]
+scoref64 = [] # scores are f64 instead of f32. was introduced to debug blockwand.
+
+[workspace]
+members = ["query-grammar"]

 [badges]
 travis-ci = { repository = "tantivy-search/tantivy" }

-[[example]]
-name = "simple_search"
-required-features = ["mmap"]
-
-[[example]]
-name = "custom_tokenizer"
+# Following the "fail" crate best practises, we isolate
+# tests that define specific behavior in fail check points
+# in a different binary.
+#
+# We do that because, fail rely on a global definition of
+# failpoints behavior and hence, it is incompatible with
+# multithreading.
+[[test]]
+name = "failpoints"
+path = "tests/failpoints/mod.rs"
+required-features = ["fail/failpoints"]
--- a/3
+++ b/3
@@ -0,0 +1,3 @@
+test:
+	echo "Run test only... No examples."
+	cargo test --tests --lib
--- a/README.md
+++ b/README.md
@@ -1,70 +1,140 @@
-![Tantivy](https://tantivy-search.github.io/logo/tantivy-logo.png)

 [![Build Status](https://travis-ci.org/tantivy-search/tantivy.svg?branch=master)](https://travis-ci.org/tantivy-search/tantivy)
-[![Coverage Status](https://coveralls.io/repos/github/tantivy-search/tantivy/badge.svg?branch=master&refresh1)](https://coveralls.io/github/tantivy-search/tantivy?branch=master)
+[![codecov](https://codecov.io/gh/tantivy-search/tantivy/branch/master/graph/badge.svg)](https://codecov.io/gh/tantivy-search/tantivy)
 [![Join the chat at https://gitter.im/tantivy-search/tantivy](https://badges.gitter.im/tantivy-search/tantivy.svg)](https://gitter.im/tantivy-search/tantivy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Build status](https://ci.appveyor.com/api/projects/status/r7nb13kj23u8m9pj/branch/master?svg=true)](https://ci.appveyor.com/project/fulmicoton/tantivy/branch/master)
+[![Crates.io](https://img.shields.io/crates/v/tantivy.svg)](https://crates.io/crates/tantivy)
+[![Say Thanks!](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/fulmicoton)

-**Tantivy** is a **full text search engine library** written in rust.
+![Tantivy](https://tantivy-search.github.io/logo/tantivy-logo.png)

-It is closer to Lucene than to Elastic Search and Solr in the sense it is not
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/0)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/0)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/1)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/1)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/2)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/2)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/3)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/3)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/4)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/4)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/5)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/5)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/6)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/6)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/7)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/7)
+
+[![Become a patron](https://c5.patreon.com/external/logo/become_a_patron_button.png)](https://www.patreon.com/fulmicoton)
+
+
+**Tantivy** is a **full text search engine library** written in Rust.
+
+It is closer to [Apache Lucene](https://lucene.apache.org/) than to [Elasticsearch](https://www.elastic.co/products/elasticsearch) or [Apache Solr](https://lucene.apache.org/solr/) in the sense it is not
 an off-the-shelf search engine server, but rather a crate that can be used
 to build such a search engine.

 Tantivy is, in fact, strongly inspired by Lucene's design.

+# Benchmark
+
+The following [benchmark](https://tantivy-search.github.io/bench/) break downs 
+performance for different type of queries / collection.
+
+
+In general, Tantivy tends to be 
+- slower than Lucene on union with a Top-K due to Block-WAND optimization.
+- faster than Lucene on intersection and phrase queries. 
+
+Your mileage WILL vary depending on the nature of queries and their load.
+
 # Features

 - Full-text search
+- Configurable tokenizer (stemming available for 17 Latin languages with third party support for Chinese ([tantivy-jieba](https://crates.io/crates/tantivy-jieba) and [cang-jie](https://crates.io/crates/cang-jie)), Japanese ([lindera](https://github.com/lindera-morphology/lindera-tantivy) and [tantivy-tokenizer-tiny-segmente](https://crates.io/crates/tantivy-tokenizer-tiny-segmenter)) and Korean ([lindera](https://github.com/lindera-morphology/lindera-tantivy) + [lindera-ko-dic-builder](https://github.com/lindera-morphology/lindera-ko-dic-builder))
+- Fast (check out the :racehorse: :sparkles: [benchmark](https://tantivy-search.github.io/bench/) :sparkles: :racehorse:)
 - Tiny startup time (<10ms), perfect for command line tools
- BM25 scoring (the same as lucene)
- Basic query language (`+michael +jackson`)
- Phrase queries search (\"michael jackson\"`)
+- BM25 scoring (the same as Lucene)
+- Natural query language (e.g. `(michael AND jackson) OR "king of pop"`)
+- Phrase queries search (e.g. `"michael jackson"`)
 - Incremental indexing
 - Multithreaded indexing (indexing English Wikipedia takes < 3 minutes on my desktop)
 - Mmap directory
- SIMD integer compression when the platform/CPU includes the SSE2 instruction set.
- Single valued and multivalued u64 and i64 fast fields (equivalent of doc values in Lucene)
+- SIMD integer compression when the platform/CPU includes the SSE2 instruction set
+- Single valued and multivalued u64, i64, and f64 fast fields (equivalent of doc values in Lucene)
 - `&[u8]` fast fields
+- Text, i64, u64, f64, dates, and hierarchical facet fields
 - LZ4 compressed document store
 - Range queries
 - Faceted search
- Configurable indexing (optional term frequency and position indexing
+- Configurable indexing (optional term frequency and position indexing)
 - Cheesy logo with a horse

-# Non-features
+## Non-features

- Distributed search and will not be in the scope of tantivy.
+- Distributed search is out of the scope of Tantivy. That being said, Tantivy is a
+library upon which one could build a distributed search. Serializable/mergeable collector state for instance, 
+are within the scope of Tantivy.


-# Supported OS and compiler
-
-Tantivy works on stable rust (>= 1.27) and supports Linux, MacOS and Windows.
-
 # Getting started

- [tantivy's simple search example](http://fulmicoton.com/tantivy-examples/simple_search.html)
- [tantivy-cli and its tutorial](https://github.com/tantivy-search/tantivy-cli).
-`tantivy-cli` is an actual command line interface that makes it easy for you to create a search engine,
-index documents and search via the CLI or a small server with a REST API.
-It will walk you through getting a wikipedia search engine up and running in a few minutes.
- [reference doc]
-    - [For the last released version](https://docs.rs/tantivy/)
-    - [For the last master branch](https://tantivy-search.github.io/tantivy/tantivy/index.html)
+Tantivy works on stable Rust (>= 1.27) and supports Linux, MacOS, and Windows.

-# Compiling
+- [Tantivy's simple search example](https://tantivy-search.github.io/examples/basic_search.html)
+- [tantivy-cli and its tutorial](https://github.com/tantivy-search/tantivy-cli) - `tantivy-cli` is an actual command line interface that makes it easy for you to create a search engine,
+index documents, and search via the CLI or a small server with a REST API.
+It walks you through getting a wikipedia search engine up and running in a few minutes.
+- [Reference doc for the last released version](https://docs.rs/tantivy/)

-## Development
+# How can I support this project?

-Tantivy compiles on stable rust but requires `Rust >= 1.27`.
-To check out and run tests, you can simply run :
+There are many ways to support this project. 

-    git clone git@github.com:tantivy-search/tantivy.git
+- Use Tantivy and tell us about your experience on [Gitter](https://gitter.im/tantivy-search/tantivy) or by email (paul.masurel@gmail.com)
+- Report bugs
+- Write a blog post
+- Help with documentation by asking questions or submitting PRs
+- Contribute code (you can join [our Gitter](https://gitter.im/tantivy-search/tantivy))
+- Talk about Tantivy around you
+- Drop a word on on [![Say Thanks!](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/fulmicoton) or even [![Become a patron](https://c5.patreon.com/external/logo/become_a_patron_button.png)](https://www.patreon.com/fulmicoton)
+
+# Contributing code
+
+We use the GitHub Pull Request workflow: reference a GitHub ticket and/or include a comprehensive commit message when opening a PR.
+
+## Clone and build locally
+
+Tantivy compiles on stable Rust but requires `Rust >= 1.27`.
+To check out and run tests, you can simply run:
+
+```bash
+    git clone https://github.com/tantivy-search/tantivy.git
    cd tantivy
    cargo build
+```

+## Run tests

-# Contribute
+Some tests will not run with just `cargo test` because of `fail-rs`.
+To run the tests exhaustively, run `./run-tests.sh`.

-Send me an email (paul.masurel at gmail.com) if you want to contribute to tantivy.
+## Debug
+
+You might find it useful to step through the programme with a debugger.
+
+### A failing test
+
+Make sure you haven't run `cargo clean` after the most recent `cargo test` or `cargo build` to guarantee that the `target/` directory exists. Use this bash script to find the name of the most recent debug build of Tantivy and run it under `rust-gdb`:
+
+```bash
+find target/debug/ -maxdepth 1 -executable -type f -name "tantivy*" -printf '%TY-%Tm-%Td %TT %p\n' | sort -r | cut -d " " -f 3 | xargs -I RECENT_DBG_TANTIVY rust-gdb RECENT_DBG_TANTIVY
+```
+
+Now that you are in `rust-gdb`, you can set breakpoints on lines and methods that match your source code and run the debug executable with flags that you normally pass to `cargo test` like this:
+
+```bash
+$gdb run --test-threads 1 --test $NAME_OF_TEST
+```
+
+### An example
+
+By default, `rustc` compiles everything in the `examples/` directory in debug mode. This makes it easy for you to make examples to reproduce bugs:
+
+```bash
+rust-gdb target/debug/examples/$EXAMPLE_NAME
+$ gdb run
+```
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -18,5 +18,5 @@ install:
 build: false

 test_script:
-  - REM SET RUST_LOG=tantivy,test & cargo test --verbose
-  - REM SET RUST_BACKTRACE=1 & cargo run --example simple_search
+  - REM SET RUST_LOG=tantivy,test & cargo test --all --verbose --no-default-features --features mmap
+  - REM SET RUST_BACKTRACE=1 & cargo build --examples
--- a/ci/script.sh
+++ b/ci/script.sh
@@ -1,20 +1,27 @@
+#!/usr/bin/env bash
+
 # This script takes care of testing your crate

 set -ex

 main() {
-    cross build --target $TARGET
-    cross build --target $TARGET --release
-
-    if [ ! -z $DISABLE_TESTS ]; then
-        return
+    if [ ! -z $CODECOV ]; then
+        echo "Codecov"
+        cargo build --verbose && cargo coverage --verbose --all && bash <(curl -s https://codecov.io/bash) -s target/kcov
+    else
+        echo "Build"
+        cross build --target $TARGET
+        if [ ! -z $DISABLE_TESTS ]; then
+            return
+        fi
+        echo "Test"
+        cross test --target $TARGET --no-default-features --features mmap
+        cross test --target $TARGET --no-default-features --features mmap query-grammar
    fi
-
-    cross test --target $TARGET
-    # cross test --target $TARGET --release
-
-    # cross run --target $TARGET
-    # cross run --target $TARGET --release
+    for example in $(ls examples/*.rs)
+    do
+        cargo run --example  $(basename $example .rs)
+    done
 }

 # we don't run the "test phase" when doing deploys
--- a/doc/.gitignore
+++ b/doc/.gitignore
@@ -0,0 +1 @@
+book
--- a/doc/book.toml
+++ b/doc/book.toml
@@ -0,0 +1,5 @@
+[book]
+authors = ["Paul Masurel"]
+multilingual = false
+src = "src"
+title = "Tantivy, the user guide"
--- a/doc/src/SUMMARY.md
+++ b/doc/src/SUMMARY.md
@@ -0,0 +1,15 @@
+# Summary
+
+
+
+[Avant Propos](./avant-propos.md)
+
+- [Segments](./basis.md)
+- [Defining your schema](./schema.md)
+- [Facetting](./facetting.md)
+- [Innerworkings](./innerworkings.md)
+  - [Inverted index](./inverted_index.md)
+- [Best practise](./inverted_index.md)
+
+[Frequently Asked Questions](./faq.md)
+[Examples](./examples.md)
--- a/doc/src/avant-propos.md
+++ b/doc/src/avant-propos.md
@@ -0,0 +1,34 @@
+# Foreword, what is the scope of tantivy?
+
+> Tantivy is a **search** engine **library** for Rust.
+
+If you are familiar with Lucene, it's an excellent approximation to consider tantivy as Lucene for rust. tantivy is heavily inspired by Lucene's design and
+they both have the same scope and targetted use cases.
+
+If you are not familiar with Lucene, let's break down our little tagline.
+
+- **Search** here means full-text search : fundamentally, tantivy is here to help you
+identify efficiently what are the documents matching a given query in your corpus.
+But modern search UI are so much more : text processing, facetting, autocomplete, fuzzy search, good
+relevancy, collapsing, highlighting, spatial search.
+
+  While some of these features are not available in tantivy yet, all of these are relevant
+  feature requests. Tantivy's objective is to offer a solid toolbox to create the best search
+  experience. But keep in mind this is just a toolbox.
+  Which bring us to the second keyword...
+
+- **Library** means that you will have to write code. tantivy is not an *all-in-one* server solution like elastic search for instance.
+
+  Sometimes a functionality will not be available in tantivy because it is too
+  specific to your use case. By design, tantivy should make it possible to extend
+  the available set of features using the existing rock-solid datastructures.
+
+  Most frequently this will mean writing your own `Collector`, your own `Scorer` or your own
+  `TokenFilter`... Some of your requirements may also be related to
+  something closer to architecture or operations. For instance, you may
+  want to build a large corpus on Hadoop, fine-tune the merge policy to keep your
+  index sharded in a time-wise fashion, or you may want to convert and existing
+  index from a different format.
+
+  Tantivy exposes a lot of low level API to do all of these things.
+  
--- a/doc/src/basis.md
+++ b/doc/src/basis.md
@@ -0,0 +1,77 @@
+# Anatomy of an index
+
+## Straight from disk
+
+Tantivy accesses its data using an abstracting trait called `Directory`.
+In theory, one can come and override the data access logic. In practise, the
+trait somewhat assumes that your data can be mapped to memory, and tantivy
+seems deeply married to using `mmap` for its io [^1], and the only persisting
+directory shipped with tantivy is the `MmapDirectory`.
+
+While this design has some downsides, this greatly simplifies the source code of
+tantivy. Caching is also entirely delegated to the OS.
+
+`tantivy` works entirely (or almost) by directly reading the datastructures as they are layed on disk. As a result, the act of opening an indexing does not involve loading different datastructures from the disk into random access memory : starting a process, opening an index, and performing your first query can typically be done in a matter of milliseconds.
+
+This is an interesting property for a command line search engine, or for some multi-tenant log search engine : spawning a new process for each new query can be a perfectly sensible solution in some use case.
+
+In later chapters, we will discuss tantivy's inverted index data layout.
+One key take away is that to achieve great performance, search indexes are extremely compact.
+Of course this is crucial to reduce IO, and ensure that as much of our index can sit in RAM.
+
+Also, whenever possible its data is accessed sequentially. Of course, this is an amazing property when tantivy needs to access the data from your spinning hard disk, but this is also
+critical for performance, if your data is read from and an `SSD` or even already in your pagecache.
+
+
+## Segments, and the log method
+
+That kind of compact layout comes at one cost: it prevents our datastructures from being dynamic.
+In fact, the `Directory` trait does not even allow you to modify part of a file.
+
+To allow the addition / deletion of documents, and create the illusion that
+your index is dynamic (i.e.: adding and deleting documents), tantivy uses a common database trick sometimes referred to as the *log method*.
+
+Let's forget about deletes for a moment.
+
+As you add documents, these documents are processed and stored in a dedicated datastructure, in a `RAM` buffer. This datastructure is not ready for search, but it is useful to receive your data and rearrange it very rapidly.
+
+As you add documents, this buffer will reach its capacity and tantivy will transparently stop adding document to it and start converting this datastructure to its final read-only format on disk. Once written, an brand empty buffer is available to resume adding documents.
+
+The resulting chunk of index obtained after this serialization is called a `Segment`.
+
+> A segment is a self-contained atomic piece of index. It is identified with a UUID, and all of its files are identified using the naming scheme : `<UUID>.*`.
+
+Which brings us to the nature of a tantivy `Index`.
+
+> A tantivy `Index` is a collection of `Segments`.
+
+Physically, this really just means and index is a bunch of segment files in a given `Directory`,
+linked together by a `meta.json` file. This transparency can become extremely handy
+to get tantivy to fit your use case:
+
+*Example 1* You could for instance use hadoop to build a very large search index in a timely manner, copy all of the resulting segment files in the same directory and edit the `meta.json` to get a functional index.[^2]
+
+*Example 2* You could also disable your merge policy and enforce daily segments. Removing data after one week can then be done very efficiently by just editing the `meta.json` and deleting the files associated to segment `D-7`.
+
+
+
+
+
+# Merging
+
+As you index more and more data, your index will accumulate more and more segments.
+Having a lot of small segments is not really optimal. There is a bit of redundancy in having
+all these term dictionary. Also when searching, we will need to do term lookups as many times as we have segments.  It can hurt search performance a bit.
+
+That's where merging or compacting comes into place. Tantivy will continuously consider merge
+opportunities and start merging segments in the background.
+
+
+# Indexing throughput, number of indexing threads
+
+
+
+
+[^1]: This may eventually change.
+
+[^2]: Be careful however. By default these files will not be considered as *managed* by tantivy. This means they will never be garbage collected by tantivy, regardless of whether they become obsolete or not.
--- a/doc/src/best_practise.md.rs
+++ b/doc/src/best_practise.md.rs
--- a/doc/src/examples.md
+++ b/doc/src/examples.md
@@ -0,0 +1,3 @@
+# Examples
+
+- [Basic search](/examples/basic_search.html)
--- a/doc/src/facetting.md
+++ b/doc/src/facetting.md
@@ -0,0 +1,5 @@
+# Facetting
+
+wewew
+
+## weeewe
--- a/doc/src/faq.md
+++ b/doc/src/faq.md
--- a/doc/src/innerworkings.md
+++ b/doc/src/innerworkings.md
@@ -0,0 +1 @@
+# Innerworkings
--- a/doc/src/inverted_index.md
+++ b/doc/src/inverted_index.md
@@ -0,0 +1 @@
+# Inverted index
--- a/doc/src/schema.md
+++ b/doc/src/schema.md
@@ -0,0 +1 @@
+# Defining your schema
--- a/examples/basic_search.rs
+++ b/examples/basic_search.rs
@@ -0,0 +1,237 @@
+// # Basic Example
+//
+// This example covers the basic functionalities of
+// tantivy.
+//
+// We will :
+// - define our schema
+// - create an index in a directory
+// - index a few documents into our index
+// - search for the best document matching a basic query
+// - retrieve the best document's original content.
+
+// ---
+// Importing tantivy...
+use tantivy::collector::TopDocs;
+use tantivy::query::QueryParser;
+use tantivy::schema::*;
+use tantivy::{doc, Index, ReloadPolicy};
+use tempfile::TempDir;
+
+fn main() -> tantivy::Result<()> {
+    // Let's create a temporary directory for the
+    // sake of this example
+    let index_path = TempDir::new()?;
+
+    // # Defining the schema
+    //
+    // The Tantivy index requires a very strict schema.
+    // The schema declares which fields are in the index,
+    // and for each field, its type and "the way it should
+    // be indexed".
+
+    // First we need to define a schema ...
+    let mut schema_builder = Schema::builder();
+
+    // Our first field is title.
+    // We want full-text search for it, and we also want
+    // to be able to retrieve the document after the search.
+    //
+    // `TEXT | STORED` is some syntactic sugar to describe
+    // that.
+    //
+    // `TEXT` means the field should be tokenized and indexed,
+    // along with its term frequency and term positions.
+    //
+    // `STORED` means that the field will also be saved
+    // in a compressed, row-oriented key-value store.
+    // This store is useful for reconstructing the
+    // documents that were selected during the search phase.
+    schema_builder.add_text_field("title", TEXT | STORED);
+
+    // Our second field is body.
+    // We want full-text search for it, but we do not
+    // need to be able to be able to retrieve it
+    // for our application.
+    //
+    // We can make our index lighter by omitting the `STORED` flag.
+    schema_builder.add_text_field("body", TEXT);
+
+    let schema = schema_builder.build();
+
+    // # Indexing documents
+    //
+    // Let's create a brand new index.
+    //
+    // This will actually just save a meta.json
+    // with our schema in the directory.
+    let index = Index::create_in_dir(&index_path, schema.clone())?;
+
+    // To insert a document we will need an index writer.
+    // There must be only one writer at a time.
+    // This single `IndexWriter` is already
+    // multithreaded.
+    //
+    // Here we give tantivy a budget of `50MB`.
+    // Using a bigger heap for the indexer may increase
+    // throughput, but 50 MB is already plenty.
+    let mut index_writer = index.writer(50_000_000)?;
+
+    // Let's index our documents!
+    // We first need a handle on the title and the body field.
+
+    // ### Adding documents
+    //
+    // We can create a document manually, by setting the fields
+    // one by one in a Document object.
+    let title = schema.get_field("title").unwrap();
+    let body = schema.get_field("body").unwrap();
+
+    let mut old_man_doc = Document::default();
+    old_man_doc.add_text(title, "The Old Man and the Sea");
+    old_man_doc.add_text(
+        body,
+        "He was an old man who fished alone in a skiff in the Gulf Stream and \
+         he had gone eighty-four days now without taking a fish.",
+    );
+
+    // ... and add it to the `IndexWriter`.
+    index_writer.add_document(old_man_doc);
+
+    // For convenience, tantivy also comes with a macro to
+    // reduce the boilerplate above.
+    index_writer.add_document(doc!(
+    title => "Of Mice and Men",
+    body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
+            bank and runs deep and green. The water is warm too, for it has slipped twinkling \
+            over the yellow sands in the sunlight before reaching the narrow pool. On one \
+            side of the river the golden foothill slopes curve up to the strong and rocky \
+            Gabilan Mountains, but on the valley side the water is lined with trees—willows \
+            fresh and green with every spring, carrying in their lower leaf junctures the \
+            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
+            limbs and branches that arch over the pool"
+    ));
+
+    index_writer.add_document(doc!(
+    title => "Of Mice and Men",
+    body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
+            bank and runs deep and green. The water is warm too, for it has slipped twinkling \
+            over the yellow sands in the sunlight before reaching the narrow pool. On one \
+            side of the river the golden foothill slopes curve up to the strong and rocky \
+            Gabilan Mountains, but on the valley side the water is lined with trees—willows \
+            fresh and green with every spring, carrying in their lower leaf junctures the \
+            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
+            limbs and branches that arch over the pool"
+    ));
+
+    // Multivalued field just need to be repeated.
+    index_writer.add_document(doc!(
+    title => "Frankenstein",
+    title => "The Modern Prometheus",
+    body => "You will rejoice to hear that no disaster has accompanied the commencement of an \
+             enterprise which you have regarded with such evil forebodings.  I arrived here \
+             yesterday, and my first task is to assure my dear sister of my welfare and \
+             increasing confidence in the success of my undertaking."
+    ));
+
+    // This is an example, so we will only index 3 documents
+    // here. You can check out tantivy's tutorial to index
+    // the English wikipedia. Tantivy's indexing is rather fast.
+    // Indexing 5 million articles of the English wikipedia takes
+    // around 3 minutes on my computer!
+
+    // ### Committing
+    //
+    // At this point our documents are not searchable.
+    //
+    //
+    // We need to call `.commit()` explicitly to force the
+    // `index_writer` to finish processing the documents in the queue,
+    // flush the current index to the disk, and advertise
+    // the existence of new documents.
+    //
+    // This call is blocking.
+    index_writer.commit()?;
+
+    // If `.commit()` returns correctly, then all of the
+    // documents that have been added are guaranteed to be
+    // persistently indexed.
+    //
+    // In the scenario of a crash or a power failure,
+    // tantivy behaves as if it has rolled back to its last
+    // commit.
+
+    // # Searching
+    //
+    // ### Searcher
+    //
+    // A reader is required first in order to search an index.
+    // It acts as a `Searcher` pool that reloads itself,
+    // depending on a `ReloadPolicy`.
+    //
+    // For a search server you will typically create one reader for the entire lifetime of your
+    // program, and acquire a new searcher for every single request.
+    //
+    // In the code below, we rely on the 'ON_COMMIT' policy: the reader
+    // will reload the index automatically after each commit.
+    let reader = index
+        .reader_builder()
+        .reload_policy(ReloadPolicy::OnCommit)
+        .try_into()?;
+
+    // We now need to acquire a searcher.
+    //
+    // A searcher points to a snapshotted, immutable version of the index.
+    //
+    // Some search experience might require more than
+    // one query. Using the same searcher ensures that all of these queries will run on the
+    // same version of the index.
+    //
+    // Acquiring a `searcher` is very cheap.
+    //
+    // You should acquire a searcher every time you start processing a request and
+    // and release it right after your query is finished.
+    let searcher = reader.searcher();
+
+    // ### Query
+
+    // The query parser can interpret human queries.
+    // Here, if the user does not specify which
+    // field they want to search, tantivy will search
+    // in both title and body.
+    let query_parser = QueryParser::for_index(&index, vec![title, body]);
+
+    // `QueryParser` may fail if the query is not in the right
+    // format. For user facing applications, this can be a problem.
+    // A ticket has been opened regarding this problem.
+    let query = query_parser.parse_query("sea whale")?;
+
+    // A query defines a set of documents, as
+    // well as the way they should be scored.
+    //
+    // A query created by the query parser is scored according
+    // to a metric called Tf-Idf, and will consider
+    // any document matching at least one of our terms.
+
+    // ### Collectors
+    //
+    // We are not interested in all of the documents but
+    // only in the top 10. Keeping track of our top 10 best documents
+    // is the role of the `TopDocs` collector.
+
+    // We can now perform our query.
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
+
+    // The actual documents still need to be
+    // retrieved from Tantivy's store.
+    //
+    // Since the body field was not configured as stored,
+    // the document returned will only contain
+    // a title.
+    for (_score, doc_address) in top_docs {
+        let retrieved_doc = searcher.doc(doc_address)?;
+        println!("{}", schema.to_json(&retrieved_doc));
+    }
+
+    Ok(())
+}
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -0,0 +1,191 @@
+// # Custom collector example
+//
+// This example shows how you can implement your own
+// collector. As an example, we will compute a collector
+// that computes the standard deviation of a given fast field.
+//
+// Of course, you can have a look at the tantivy's built-in collectors
+// such as the `CountCollector` for more examples.
+
+// ---
+// Importing tantivy...
+use tantivy::collector::{Collector, SegmentCollector};
+use tantivy::fastfield::FastFieldReader;
+use tantivy::query::QueryParser;
+use tantivy::schema::Field;
+use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
+use tantivy::{doc, Index, Score, SegmentReader, TantivyError};
+
+#[derive(Default)]
+struct Stats {
+    count: usize,
+    sum: f64,
+    squared_sum: f64,
+}
+
+impl Stats {
+    pub fn count(&self) -> usize {
+        self.count
+    }
+
+    pub fn mean(&self) -> f64 {
+        self.sum / (self.count as f64)
+    }
+
+    fn square_mean(&self) -> f64 {
+        self.squared_sum / (self.count as f64)
+    }
+
+    pub fn standard_deviation(&self) -> f64 {
+        let mean = self.mean();
+        (self.square_mean() - mean * mean).sqrt()
+    }
+
+    fn non_zero_count(self) -> Option<Stats> {
+        if self.count == 0 {
+            None
+        } else {
+            Some(self)
+        }
+    }
+}
+
+struct StatsCollector {
+    field: Field,
+}
+
+impl StatsCollector {
+    fn with_field(field: Field) -> StatsCollector {
+        StatsCollector { field }
+    }
+}
+
+impl Collector for StatsCollector {
+    // That's the type of our result.
+    // Our standard deviation will be a float.
+    type Fruit = Option<Stats>;
+
+    type Child = StatsSegmentCollector;
+
+    fn for_segment(
+        &self,
+        _segment_local_id: u32,
+        segment_reader: &SegmentReader,
+    ) -> tantivy::Result<StatsSegmentCollector> {
+        let fast_field_reader = segment_reader
+            .fast_fields()
+            .u64(self.field)
+            .ok_or_else(|| {
+                let field_name = segment_reader.schema().get_field_name(self.field);
+                TantivyError::SchemaError(format!(
+                    "Field {:?} is not a u64 fast field.",
+                    field_name
+                ))
+            })?;
+        Ok(StatsSegmentCollector {
+            fast_field_reader,
+            stats: Stats::default(),
+        })
+    }
+
+    fn requires_scoring(&self) -> bool {
+        // this collector does not care about score.
+        false
+    }
+
+    fn merge_fruits(&self, segment_stats: Vec<Option<Stats>>) -> tantivy::Result<Option<Stats>> {
+        let mut stats = Stats::default();
+        for segment_stats_opt in segment_stats {
+            if let Some(segment_stats) = segment_stats_opt {
+                stats.count += segment_stats.count;
+                stats.sum += segment_stats.sum;
+                stats.squared_sum += segment_stats.squared_sum;
+            }
+        }
+        Ok(stats.non_zero_count())
+    }
+}
+
+struct StatsSegmentCollector {
+    fast_field_reader: FastFieldReader<u64>,
+    stats: Stats,
+}
+
+impl SegmentCollector for StatsSegmentCollector {
+    type Fruit = Option<Stats>;
+
+    fn collect(&mut self, doc: u32, _score: Score) {
+        let value = self.fast_field_reader.get(doc) as f64;
+        self.stats.count += 1;
+        self.stats.sum += value;
+        self.stats.squared_sum += value * value;
+    }
+
+    fn harvest(self) -> <Self as SegmentCollector>::Fruit {
+        self.stats.non_zero_count()
+    }
+}
+
+fn main() -> tantivy::Result<()> {
+    // # Defining the schema
+    //
+    // The Tantivy index requires a very strict schema.
+    // The schema declares which fields are in the index,
+    // and for each field, its type and "the way it should
+    // be indexed".
+
+    // first we need to define a schema ...
+    let mut schema_builder = Schema::builder();
+
+    // We'll assume a fictional index containing
+    // products, and with a name, a description, and a price.
+    let product_name = schema_builder.add_text_field("name", TEXT);
+    let product_description = schema_builder.add_text_field("description", TEXT);
+    let price = schema_builder.add_u64_field("price", INDEXED | FAST);
+    let schema = schema_builder.build();
+
+    // # Indexing documents
+    //
+    // Lets index a bunch of fake documents for the sake of
+    // this example.
+    let index = Index::create_in_ram(schema.clone());
+
+    let mut index_writer = index.writer(50_000_000)?;
+    index_writer.add_document(doc!(
+        product_name => "Super Broom 2000",
+        product_description => "While it is ok for short distance travel, this broom \
+        was designed quiditch. It will up your game.",
+        price => 30_200u64
+    ));
+    index_writer.add_document(doc!(
+        product_name => "Turbulobroom",
+        product_description => "You might have heard of this broom before : it is the sponsor of the Wales team.\
+            You'll enjoy its sharp turns, and rapid acceleration",
+        price => 29_240u64
+    ));
+    index_writer.add_document(doc!(
+        product_name => "Broomio",
+        product_description => "Great value for the price. This broom is a market favorite",
+        price => 21_240u64
+    ));
+    index_writer.add_document(doc!(
+        product_name => "Whack a Mole",
+        product_description => "Prime quality bat.",
+        price => 5_200u64
+    ));
+    index_writer.commit()?;
+
+    let reader = index.reader()?;
+    let searcher = reader.searcher();
+    let query_parser = QueryParser::for_index(&index, vec![product_name, product_description]);
+
+    // here we want to get a hit on the 'ken' in Frankenstein
+    let query = query_parser.parse_query("broom")?;
+    if let Some(stats) = searcher.search(&query, &StatsCollector::with_field(price))? {
+        println!("count: {}", stats.count());
+        println!("mean: {}", stats.mean());
+        println!("standard deviation: {}", stats.standard_deviation());
+    }
+
+    Ok(())
+}
--- a/examples/custom_tokenizer.rs
+++ b/examples/custom_tokenizer.rs
@@ -1,27 +1,14 @@
-extern crate tantivy;
-extern crate tempdir;
-
-#[macro_use]
-extern crate serde_json;
-
-use std::path::Path;
-use tantivy::collector::TopCollector;
+// # Defining a tokenizer pipeline
+//
+// In this example, we'll see how to define a tokenizer pipeline
+// by aligning a bunch of `TokenFilter`.
+use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
 use tantivy::schema::*;
 use tantivy::tokenizer::NgramTokenizer;
-use tantivy::Index;
-use tempdir::TempDir;
+use tantivy::{doc, Index};

-fn main() {
-    // Let's create a temporary directory for the
-    // sake of this example
-    if let Ok(dir) = TempDir::new("tantivy_token_example_dir") {
-        run_example(dir.path()).unwrap();
-        dir.close().unwrap();
-    }
-}
-
-fn run_example(index_path: &Path) -> tantivy::Result<()> {
+fn main() -> tantivy::Result<()> {
    // # Defining the schema
    //
    // The Tantivy index requires a very strict schema.
@@ -30,7 +17,7 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // be indexed".

    // first we need to define a schema ...
-    let mut schema_builder = SchemaBuilder::default();
+    let mut schema_builder = Schema::builder();

    // Our first field is title.
    // In this example we want to use NGram searching
@@ -42,7 +29,7 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    let text_options = TextOptions::default()
        .set_indexing_options(text_field_indexing)
        .set_stored();
-    schema_builder.add_text_field("title", text_options);
+    let title = schema_builder.add_text_field("title", text_options);

    // Our second field is body.
    // We want full-text search for it, but we do not
@@ -51,17 +38,17 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    //
    // We can make our index lighter and
    // by omitting `STORED` flag.
-    schema_builder.add_text_field("body", TEXT);
+    let body = schema_builder.add_text_field("body", TEXT);

    let schema = schema_builder.build();

    // # Indexing documents
    //
    // Let's create a brand new index.
-    //
-    // This will actually just save a meta.json
-    // with our schema in the directory.
-    let index = Index::create_in_dir(index_path, schema.clone())?;
+    // To simplify we will work entirely in RAM.
+    // This is not what you want in reality, but it is very useful
+    // for your unit tests... Or this example.
+    let index = Index::create_in_ram(schema.clone());

    // here we are registering our custome tokenizer
    // this will store tokens of 3 characters each
@@ -77,102 +64,33 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // Here we use a buffer of 50MB per thread. Using a bigger
    // heap for the indexer can increase its throughput.
    let mut index_writer = index.writer(50_000_000)?;
-
-    // Let's index our documents!
-    // We first need a handle on the title and the body field.
-
-    // ### Create a document "manually".
-    //
-    // We can create a document manually, by setting the fields
-    // one by one in a Document object.
-    let title = schema.get_field("title").unwrap();
-    let body = schema.get_field("body").unwrap();
-
-    let mut old_man_doc = Document::default();
-    old_man_doc.add_text(title, "The Old Man and the Sea");
-    old_man_doc.add_text(
-        body,
-        "He was an old man who fished alone in a skiff in the Gulf Stream and \
-         he had gone eighty-four days now without taking a fish.",
-    );
-
-    // ... and add it to the `IndexWriter`.
-    index_writer.add_document(old_man_doc);
-
-    // ### Create a document directly from json.
-    //
-    // Alternatively, we can use our schema to parse a
-    // document object directly from json.
-    // The document is a string, but we use the `json` macro
-    // from `serde_json` for the convenience of multi-line support.
-    let json = json!({
-       "title": "Of Mice and Men",
-       "body": "A few miles south of Soledad, the Salinas River drops in close to the hillside \
-                bank and runs deep and green. The water is warm too, for it has slipped twinkling \
-                over the yellow sands in the sunlight before reaching the narrow pool. On one \
-                side of the river the golden foothill slopes curve up to the strong and rocky \
-                Gabilan Mountains, but on the valley side the water is lined with trees—willows \
-                fresh and green with every spring, carrying in their lower leaf junctures the \
-                debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
-                limbs and branches that arch over the pool"
-    });
-    let mice_and_men_doc = schema.parse_document(&json.to_string())?;
-
-    index_writer.add_document(mice_and_men_doc);
-
-    // Multi-valued field are allowed, they are
-    // expressed in JSON by an array.
-    // The following document has two titles.
-    let json = json!({
-       "title": ["Frankenstein", "The Modern Prometheus"],
-       "body": "You will rejoice to hear that no disaster has accompanied the commencement of an \
-                enterprise which you have regarded with such evil forebodings.  I arrived here \
-                yesterday, and my first task is to assure my dear sister of my welfare and \
-                increasing confidence in the success of my undertaking."
-    });
-    let frankenstein_doc = schema.parse_document(&json.to_string())?;
-
-    index_writer.add_document(frankenstein_doc);
-
-    // This is an example, so we will only index 3 documents
-    // here. You can check out tantivy's tutorial to index
-    // the English wikipedia. Tantivy's indexing is rather fast.
-    // Indexing 5 million articles of the English wikipedia takes
-    // around 4 minutes on my computer!
-
-    // ### Committing
-    //
-    // At this point our documents are not searchable.
-    //
-    //
-    // We need to call .commit() explicitly to force the
-    // index_writer to finish processing the documents in the queue,
-    // flush the current index to the disk, and advertise
-    // the existence of new documents.
-    //
-    // This call is blocking.
+    index_writer.add_document(doc!(
+    title => "The Old Man and the Sea",
+    body => "He was an old man who fished alone in a skiff in the Gulf Stream and \
+     he had gone eighty-four days now without taking a fish."
+    ));
+    index_writer.add_document(doc!(
+    title => "Of Mice and Men",
+       body => r#"A few miles south of Soledad, the Salinas River drops in close to the hillside
+                bank and runs deep and green. The water is warm too, for it has slipped twinkling
+                over the yellow sands in the sunlight before reaching the narrow pool. On one
+                side of the river the golden foothill slopes curve up to the strong and rocky
+                Gabilan Mountains, but on the valley side the water is lined with trees—willows
+                fresh and green with every spring, carrying in their lower leaf junctures the
+                debris of the winter’s flooding; and sycamores with mottled, white, recumbent
+                limbs and branches that arch over the pool"#
+    ));
+    index_writer.add_document(doc!(
+    title => "Frankenstein",
+        body => r#"You will rejoice to hear that no disaster has accompanied the commencement of an
+                enterprise which you have regarded with such evil forebodings.  I arrived here
+                yesterday, and my first task is to assure my dear sister of my welfare and
+                increasing confidence in the success of my undertaking."#
+    ));
    index_writer.commit()?;

-    // If `.commit()` returns correctly, then all of the
-    // documents that have been added are guaranteed to be
-    // persistently indexed.
-    //
-    // In the scenario of a crash or a power failure,
-    // tantivy behaves as if has rolled back to its last
-    // commit.
-
-    // # Searching
-    //
-    // Let's search our index. Start by reloading
-    // searchers in the index. This should be done
-    // after every commit().
-    index.load_searchers()?;
-
-    // Afterwards create one (or more) searchers.
-    //
-    // You should create a searcher
-    // every time you start a "search query".
-    let searcher = index.searcher();
+    let reader = index.reader()?;
+    let searcher = reader.searcher();

    // The query parser can interpret human queries.
    // Here, if the user does not specify which
@@ -183,44 +101,12 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // here we want to get a hit on the 'ken' in Frankenstein
    let query = query_parser.parse_query("ken")?;

-    // A query defines a set of documents, as
-    // well as the way they should be scored.
-    //
-    // A query created by the query parser is scored according
-    // to a metric called Tf-Idf, and will consider
-    // any document matching at least one of our terms.
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;

-    // ### Collectors
-    //
-    // We are not interested in all of the documents but
-    // only in the top 10. Keeping track of our top 10 best documents
-    // is the role of the TopCollector.
-    let mut top_collector = TopCollector::with_limit(10);
-
-    // We can now perform our query.
-    searcher.search(&*query, &mut top_collector)?;
-
-    // Our top collector now contains the 10
-    // most relevant doc ids...
-    let doc_addresses = top_collector.docs();
-
-    // The actual documents still need to be
-    // retrieved from Tantivy's store.
-    //
-    // Since the body field was not configured as stored,
-    // the document returned will only contain
-    // a title.
-
-    for doc_address in doc_addresses {
-        let retrieved_doc = searcher.doc(&doc_address)?;
+    for (_, doc_address) in top_docs {
+        let retrieved_doc = searcher.doc(doc_address)?;
        println!("{}", schema.to_json(&retrieved_doc));
    }

-    // Wait for indexing and merging threads to shut down.
-    // Usually this isn't needed, but in `main` we try to
-    // delete the temporary directory and that fails on
-    // Windows if the files are still open.
-    index_writer.wait_merging_threads()?;
-
    Ok(())
 }
--- a/examples/deleting_updating_documents.rs
+++ b/examples/deleting_updating_documents.rs
@@ -0,0 +1,143 @@
+// # Deleting and Updating (?) documents
+//
+// This example explains how to delete and update documents.
+// In fact there is actually no such thing as an update in tantivy.
+//
+// To update a document, you need to delete a document and then reinsert
+// its new version.
+//
+// ---
+// Importing tantivy...
+use tantivy::collector::TopDocs;
+use tantivy::query::TermQuery;
+use tantivy::schema::*;
+use tantivy::{doc, Index, IndexReader};
+
+// A simple helper function to fetch a single document
+// given its id from our index.
+// It will be helpful to check our work.
+fn extract_doc_given_isbn(
+    reader: &IndexReader,
+    isbn_term: &Term,
+) -> tantivy::Result<Option<Document>> {
+    let searcher = reader.searcher();
+
+    // This is the simplest query you can think of.
+    // It matches all of the documents containing a specific term.
+    //
+    // The second argument is here to tell we don't care about decoding positions,
+    // or term frequencies.
+    let term_query = TermQuery::new(isbn_term.clone(), IndexRecordOption::Basic);
+    let top_docs = searcher.search(&term_query, &TopDocs::with_limit(1))?;
+
+    if let Some((_score, doc_address)) = top_docs.first() {
+        let doc = searcher.doc(*doc_address)?;
+        Ok(Some(doc))
+    } else {
+        // no doc matching this ID.
+        Ok(None)
+    }
+}
+
+fn main() -> tantivy::Result<()> {
+    // # Defining the schema
+    //
+    // Check out the *basic_search* example if this makes
+    // small sense to you.
+    let mut schema_builder = Schema::builder();
+
+    // Tantivy does not really have a notion of primary id.
+    // This may change in the future.
+    //
+    // Still, we can create a `isbn` field and use it as an id. This
+    // field can be `u64` or a `text`, depending on your use case.
+    // It just needs to be indexed.
+    //
+    // If it is `text`, let's make sure to keep it `raw` and let's avoid
+    // running any text processing on it.
+    // This is done by associating this field to the tokenizer named `raw`.
+    // Rather than building our [`TextOptions`](//docs.rs/tantivy/~0/tantivy/schema/struct.TextOptions.html) manually,
+    // We use the `STRING` shortcut. `STRING` stands for indexed (without term frequency or positions)
+    // and untokenized.
+    //
+    // Because we also want to be able to see this `id` in our returned documents,
+    // we also mark the field as stored.
+    let isbn = schema_builder.add_text_field("isbn", STRING | STORED);
+    let title = schema_builder.add_text_field("title", TEXT | STORED);
+    let schema = schema_builder.build();
+
+    let index = Index::create_in_ram(schema.clone());
+
+    let mut index_writer = index.writer(50_000_000)?;
+
+    // Let's add a couple of documents, for the sake of the example.
+    let mut old_man_doc = Document::default();
+    old_man_doc.add_text(title, "The Old Man and the Sea");
+    index_writer.add_document(doc!(
+        isbn => "978-0099908401",
+        title => "The old Man and the see"
+    ));
+    index_writer.add_document(doc!(
+        isbn => "978-0140177398",
+        title => "Of Mice and Men",
+    ));
+    index_writer.add_document(doc!(
+       title => "Frankentein", //< Oops there is a typo here.
+       isbn => "978-9176370711",
+    ));
+    index_writer.commit()?;
+    let reader = index.reader()?;
+
+    let frankenstein_isbn = Term::from_field_text(isbn, "978-9176370711");
+
+    // Oops our frankenstein doc seems mispelled
+    let frankenstein_doc_misspelled = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap();
+    assert_eq!(
+        schema.to_json(&frankenstein_doc_misspelled),
+        r#"{"isbn":["978-9176370711"],"title":["Frankentein"]}"#,
+    );
+
+    // # Update = Delete + Insert
+    //
+    // Here we will want to update the typo in the `Frankenstein` book.
+    //
+    // Tantivy does not handle updates directly, we need to delete
+    // and reinsert the document.
+    //
+    // This can be complicated as it means you need to have access
+    // to the entire document. It is good practise to integrate tantivy
+    // with a key value store for this reason.
+    //
+    // To remove one of the document, we just call `delete_term`
+    // on its id.
+    //
+    // Note that `tantivy` does nothing to enforce the idea that
+    // there is only one document associated to this id.
+    //
+    // Also you might have noticed that we apply the delete before
+    // having committed. This does not matter really...
+    index_writer.delete_term(frankenstein_isbn.clone());
+
+    // We now need to reinsert our document without the typo.
+    index_writer.add_document(doc!(
+       title => "Frankenstein",
+       isbn => "978-9176370711",
+    ));
+
+    // You are guaranteed that your clients will only observe your index in
+    // the state it was in after a commit.
+    // In this example, your search engine will at no point be missing the *Frankenstein* document.
+    // Everything happened as if the document was updated.
+    index_writer.commit()?;
+    // We reload our searcher to make our change available to clients.
+    reader.reload()?;
+
+    // No more typo!
+    let frankenstein_new_doc = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap();
+    assert_eq!(
+        schema.to_json(&frankenstein_new_doc),
+        r#"{"isbn":["978-9176370711"],"title":["Frankenstein"]}"#,
+    );
+
+    Ok(())
+}
--- a/examples/faceted_search.rs
+++ b/examples/faceted_search.rs
@@ -0,0 +1,112 @@
+// # Basic Example
+//
+// This example covers the basic functionalities of
+// tantivy.
+//
+// We will :
+// - define our schema
+// = create an index in a directory
+// - index few documents in our index
+// - search for the best document matchings "sea whale"
+// - retrieve the best document original content.
+
+// ---
+// Importing tantivy...
+use tantivy::collector::FacetCollector;
+use tantivy::query::{AllQuery, TermQuery};
+use tantivy::schema::*;
+use tantivy::{doc, Index};
+
+fn main() -> tantivy::Result<()> {
+    // Let's create a temporary directory for the sake of this example
+    let mut schema_builder = Schema::builder();
+
+    let name = schema_builder.add_text_field("felin_name", TEXT | STORED);
+    // this is our faceted field: its scientific classification
+    let classification = schema_builder.add_facet_field("classification");
+
+    let schema = schema_builder.build();
+    let index = Index::create_in_ram(schema);
+
+    let mut index_writer = index.writer(30_000_000)?;
+
+    // For convenience, tantivy also comes with a macro to
+    // reduce the boilerplate above.
+    index_writer.add_document(doc!(
+        name => "Cat",
+        classification => Facet::from("/Felidae/Felinae/Felis")
+    ));
+    index_writer.add_document(doc!(
+        name => "Canada lynx",
+        classification => Facet::from("/Felidae/Felinae/Lynx")
+    ));
+    index_writer.add_document(doc!(
+        name => "Cheetah",
+        classification => Facet::from("/Felidae/Felinae/Acinonyx")
+    ));
+    index_writer.add_document(doc!(
+        name => "Tiger",
+        classification => Facet::from("/Felidae/Pantherinae/Panthera")
+    ));
+    index_writer.add_document(doc!(
+        name => "Lion",
+        classification => Facet::from("/Felidae/Pantherinae/Panthera")
+    ));
+    index_writer.add_document(doc!(
+        name => "Jaguar",
+        classification => Facet::from("/Felidae/Pantherinae/Panthera")
+    ));
+    index_writer.add_document(doc!(
+        name => "Sunda clouded leopard",
+        classification => Facet::from("/Felidae/Pantherinae/Neofelis")
+    ));
+    index_writer.add_document(doc!(
+        name => "Fossa",
+        classification => Facet::from("/Eupleridae/Cryptoprocta")
+    ));
+    index_writer.commit()?;
+
+    let reader = index.reader()?;
+    let searcher = reader.searcher();
+    {
+        let mut facet_collector = FacetCollector::for_field(classification);
+        facet_collector.add_facet("/Felidae");
+        let facet_counts = searcher.search(&AllQuery, &facet_collector)?;
+        // This lists all of the facet counts, right below "/Felidae".
+        let facets: Vec<(&Facet, u64)> = facet_counts.get("/Felidae").collect();
+        assert_eq!(
+            facets,
+            vec![
+                (&Facet::from("/Felidae/Felinae"), 3),
+                (&Facet::from("/Felidae/Pantherinae"), 4),
+            ]
+        );
+    }
+
+    // Facets are also searchable.
+    //
+    // For instance a common UI pattern is to allow the user someone to click on a facet link
+    // (e.g: `Pantherinae`) to drill down and filter the current result set with this subfacet.
+    //
+    // The search would then look as follows.
+
+    // Check the reference doc for different ways to create a `Facet` object.
+    {
+        let facet = Facet::from_text("/Felidae/Pantherinae");
+        let facet_term = Term::from_facet(classification, &facet);
+        let facet_term_query = TermQuery::new(facet_term, IndexRecordOption::Basic);
+        let mut facet_collector = FacetCollector::for_field(classification);
+        facet_collector.add_facet("/Felidae/Pantherinae");
+        let facet_counts = searcher.search(&facet_term_query, &facet_collector)?;
+        let facets: Vec<(&Facet, u64)> = facet_counts.get("/Felidae/Pantherinae").collect();
+        assert_eq!(
+            facets,
+            vec![
+                (&Facet::from("/Felidae/Pantherinae/Neofelis"), 1),
+                (&Facet::from("/Felidae/Pantherinae/Panthera"), 3),
+            ]
+        );
+    }
+
+    Ok(())
+}
--- a/examples/faceted_search_with_tweaked_score.rs
+++ b/examples/faceted_search_with_tweaked_score.rs
@@ -0,0 +1,98 @@
+use std::collections::HashSet;
+use tantivy::collector::TopDocs;
+use tantivy::doc;
+use tantivy::query::BooleanQuery;
+use tantivy::schema::*;
+use tantivy::{DocId, Index, Score, SegmentReader};
+
+fn main() -> tantivy::Result<()> {
+    let mut schema_builder = Schema::builder();
+
+    let title = schema_builder.add_text_field("title", STORED);
+    let ingredient = schema_builder.add_facet_field("ingredient");
+
+    let schema = schema_builder.build();
+    let index = Index::create_in_ram(schema.clone());
+
+    let mut index_writer = index.writer(30_000_000)?;
+
+    index_writer.add_document(doc!(
+        title => "Fried egg",
+        ingredient => Facet::from("/ingredient/egg"),
+        ingredient => Facet::from("/ingredient/oil"),
+    ));
+    index_writer.add_document(doc!(
+        title => "Scrambled egg",
+        ingredient => Facet::from("/ingredient/egg"),
+        ingredient => Facet::from("/ingredient/butter"),
+        ingredient => Facet::from("/ingredient/milk"),
+        ingredient => Facet::from("/ingredient/salt"),
+    ));
+    index_writer.add_document(doc!(
+        title => "Egg rolls",
+        ingredient => Facet::from("/ingredient/egg"),
+        ingredient => Facet::from("/ingredient/garlic"),
+        ingredient => Facet::from("/ingredient/salt"),
+        ingredient => Facet::from("/ingredient/oil"),
+        ingredient => Facet::from("/ingredient/tortilla-wrap"),
+        ingredient => Facet::from("/ingredient/mushroom"),
+    ));
+    index_writer.commit()?;
+
+    let reader = index.reader()?;
+    let searcher = reader.searcher();
+    {
+        let facets = vec![
+            Facet::from("/ingredient/egg"),
+            Facet::from("/ingredient/oil"),
+            Facet::from("/ingredient/garlic"),
+            Facet::from("/ingredient/mushroom"),
+        ];
+        let query = BooleanQuery::new_multiterms_query(
+            facets
+                .iter()
+                .map(|key| Term::from_facet(ingredient, &key))
+                .collect(),
+        );
+        let top_docs_by_custom_score =
+            TopDocs::with_limit(2).tweak_score(move |segment_reader: &SegmentReader| {
+                let ingredient_reader = segment_reader.facet_reader(ingredient).unwrap();
+                let facet_dict = ingredient_reader.facet_dict();
+
+                let query_ords: HashSet<u64> = facets
+                    .iter()
+                    .filter_map(|key| facet_dict.term_ord(key.encoded_str()))
+                    .collect();
+
+                let mut facet_ords_buffer: Vec<u64> = Vec::with_capacity(20);
+
+                move |doc: DocId, original_score: Score| {
+                    ingredient_reader.facet_ords(doc, &mut facet_ords_buffer);
+                    let missing_ingredients = facet_ords_buffer
+                        .iter()
+                        .filter(|ord| !query_ords.contains(ord))
+                        .count();
+                    let tweak = 1.0 / 4_f32.powi(missing_ingredients as i32);
+
+                    original_score * tweak
+                }
+            });
+        let top_docs = searcher.search(&query, &top_docs_by_custom_score)?;
+
+        let titles: Vec<String> = top_docs
+            .iter()
+            .map(|(_, doc_id)| {
+                searcher
+                    .doc(*doc_id)
+                    .unwrap()
+                    .get_first(title)
+                    .unwrap()
+                    .text()
+                    .unwrap()
+                    .to_owned()
+            })
+            .collect();
+        assert_eq!(titles, vec!["Fried egg", "Egg rolls"]);
+    }
+    Ok(())
+}
--- a/examples/generate_html.sh
+++ b/examples/generate_html.sh
@@ -1,2 +0,0 @@
-#!/bin/bash
-docco simple_search.rs -o html
--- a/examples/html/docco.css
+++ b/examples/html/docco.css
@@ -1,518 +0,0 @@
-/*--------------------- Typography ----------------------------*/
-
-@font-face {
-    font-family: 'aller-light';
-    src: url('public/fonts/aller-light.eot');
-    src: url('public/fonts/aller-light.eot?#iefix') format('embedded-opentype'),
-         url('public/fonts/aller-light.woff') format('woff'),
-         url('public/fonts/aller-light.ttf') format('truetype');
-    font-weight: normal;
-    font-style: normal;
-}
-
-@font-face {
-    font-family: 'aller-bold';
-    src: url('public/fonts/aller-bold.eot');
-    src: url('public/fonts/aller-bold.eot?#iefix') format('embedded-opentype'),
-         url('public/fonts/aller-bold.woff') format('woff'),
-         url('public/fonts/aller-bold.ttf') format('truetype');
-    font-weight: normal;
-    font-style: normal;
-}
-
-@font-face {
-    font-family: 'roboto-black';
-    src: url('public/fonts/roboto-black.eot');
-    src: url('public/fonts/roboto-black.eot?#iefix') format('embedded-opentype'),
-         url('public/fonts/roboto-black.woff') format('woff'),
-         url('public/fonts/roboto-black.ttf') format('truetype');
-    font-weight: normal;
-    font-style: normal;
-}
-
-/*--------------------- Layout ----------------------------*/
-html { height: 100%; }
-body {
-  font-family: "aller-light";
-  font-size: 14px;
-  line-height: 18px;
-  color: #30404f;
-  margin: 0; padding: 0;
-  height:100%;
-}
-#container { min-height: 100%; }
-
-a {
-  color: #000;
-}
-
-b, strong {
-  font-weight: normal;
-  font-family: "aller-bold";
-}
-
-p {
-  margin: 15px 0 0px;
-}
-  .annotation ul, .annotation ol {
-    margin: 25px 0;
-  }
-    .annotation ul li, .annotation ol li {
-      font-size: 14px;
-      line-height: 18px;
-      margin: 10px 0;
-    }
-
-h1, h2, h3, h4, h5, h6 {
-  color: #112233;
-  line-height: 1em;
-  font-weight: normal;
-  font-family: "roboto-black";
-  text-transform: uppercase;
-  margin: 30px 0 15px 0;
-}
-
-h1 {
-  margin-top: 40px;
-}
-h2 {
-  font-size: 1.26em;
-}
-
-hr {
-  border: 0;
-  background: 1px #ddd;
-  height: 1px;
-  margin: 20px 0;
-}
-
-pre, tt, code {
-  font-size: 12px; line-height: 16px;
-  font-family: Menlo, Monaco, Consolas, "Lucida Console", monospace;
-  margin: 0; padding: 0;
-}
-  .annotation pre {
-    display: block;
-    margin: 0;
-    padding: 7px 10px;
-    background: #fcfcfc;
-    -moz-box-shadow:    inset 0 0 10px rgba(0,0,0,0.1);
-    -webkit-box-shadow: inset 0 0 10px rgba(0,0,0,0.1);
-    box-shadow:         inset 0 0 10px rgba(0,0,0,0.1);
-    overflow-x: auto;
-  }
-    .annotation pre code {
-      border: 0;
-      padding: 0;
-      background: transparent;
-    }
-
-
-blockquote {
-  border-left: 5px solid #ccc;
-  margin: 0;
-  padding: 1px 0 1px 1em;
-}
-  .sections blockquote p {
-    font-family: Menlo, Consolas, Monaco, monospace;
-    font-size: 12px; line-height: 16px;
-    color: #999;
-    margin: 10px 0 0;
-    white-space: pre-wrap;
-  }
-
-ul.sections {
-  list-style: none;
-  padding:0 0 5px 0;;
-  margin:0;
-}
-
-/*
-  Force border-box so that % widths fit the parent
-  container without overlap because of margin/padding.
-
-  More Info : http://www.quirksmode.org/css/box.html
-*/
-ul.sections > li > div {
-  -moz-box-sizing: border-box;    /* firefox */
-  -ms-box-sizing: border-box;     /* ie */
-  -webkit-box-sizing: border-box; /* webkit */
-  -khtml-box-sizing: border-box;  /* konqueror */
-  box-sizing: border-box;         /* css3 */
-}
-
-
-/*---------------------- Jump Page -----------------------------*/
-#jump_to, #jump_page {
-  margin: 0;
-  background: white;
-  -webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777;
-  -webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px;
-  font: 16px Arial;
-  cursor: pointer;
-  text-align: right;
-  list-style: none;
-}
-
-#jump_to a {
-  text-decoration: none;
-}
-
-#jump_to a.large {
-  display: none;
-}
-#jump_to a.small {
-  font-size: 22px;
-  font-weight: bold;
-  color: #676767;
-}
-
-#jump_to, #jump_wrapper {
-  position: fixed;
-  right: 0; top: 0;
-  padding: 10px 15px;
-  margin:0;
-}
-
-#jump_wrapper {
-  display: none;
-  padding:0;
-}
-
-#jump_to:hover #jump_wrapper {
-  display: block;
-}
-
-#jump_page_wrapper{
-  position: fixed;
-  right: 0;
-  top: 0;
-  bottom: 0;
-}
-
-#jump_page {
-  padding: 5px 0 3px;
-  margin: 0 0 25px 25px;
-  max-height: 100%;
-  overflow: auto;
-}
-
-#jump_page .source {
-  display: block;
-  padding: 15px;
-  text-decoration: none;
-  border-top: 1px solid #eee;
-}
-
-#jump_page .source:hover {
-  background: #f5f5ff;
-}
-
-#jump_page .source:first-child {
-}
-
-/*---------------------- Low resolutions (> 320px) ---------------------*/
-@media only screen and (min-width: 320px) {
-  .pilwrap { display: none; }
-
-  ul.sections > li > div {
-    display: block;
-    padding:5px 10px 0 10px;
-  }
-
-  ul.sections > li > div.annotation ul, ul.sections > li > div.annotation ol {
-    padding-left: 30px;
-  }
-
-  ul.sections > li > div.content {
-    overflow-x:auto;
-    -webkit-box-shadow: inset 0 0 5px #e5e5ee;
-    box-shadow: inset 0 0 5px #e5e5ee;
-    border: 1px solid #dedede;
-    margin:5px 10px 5px 10px;
-    padding-bottom: 5px;
-  }
-
-  ul.sections > li > div.annotation pre {
-    margin: 7px 0 7px;
-    padding-left: 15px;
-  }
-
-  ul.sections > li > div.annotation p tt, .annotation code {
-    background: #f8f8ff;
-    border: 1px solid #dedede;
-    font-size: 12px;
-    padding: 0 0.2em;
-  }
-}
-
-/*----------------------  (> 481px) ---------------------*/
-@media only screen and (min-width: 481px) {
-  #container {
-    position: relative;
-  }
-  body {
-    background-color: #F5F5FF;
-    font-size: 15px;
-    line-height: 21px;
-  }
-  pre, tt, code {
-    line-height: 18px;
-  }
-  p, ul, ol {
-    margin: 0 0 15px;
-  }
-
-
-  #jump_to {
-    padding: 5px 10px;
-  }
-  #jump_wrapper {
-    padding: 0;
-  }
-  #jump_to, #jump_page {
-    font: 10px Arial;
-    text-transform: uppercase;
-  }
-  #jump_page .source {
-    padding: 5px 10px;
-  }
-  #jump_to a.large {
-    display: inline-block;
-  }
-  #jump_to a.small {
-    display: none;
-  }
-
-
-
-  #background {
-    position: absolute;
-    top: 0; bottom: 0;
-    width: 350px;
-    background: #fff;
-    border-right: 1px solid #e5e5ee;
-    z-index: -1;
-  }
-
-  ul.sections > li > div.annotation ul, ul.sections > li > div.annotation ol {
-    padding-left: 40px;
-  }
-
-  ul.sections > li {
-    white-space: nowrap;
-  }
-
-  ul.sections > li > div {
-    display: inline-block;
-  }
-
-  ul.sections > li > div.annotation {
-    max-width: 350px;
-    min-width: 350px;
-    min-height: 5px;
-    padding: 13px;
-    overflow-x: hidden;
-    white-space: normal;
-    vertical-align: top;
-    text-align: left;
-  }
-  ul.sections > li > div.annotation pre {
-    margin: 15px 0 15px;
-    padding-left: 15px;
-  }
-
-  ul.sections > li > div.content {
-    padding: 13px;
-    vertical-align: top;
-    border: none;
-    -webkit-box-shadow: none;
-    box-shadow: none;
-  }
-
-  .pilwrap {
-    position: relative;
-    display: inline;
-  }
-
-  .pilcrow {
-    font: 12px Arial;
-    text-decoration: none;
-    color: #454545;
-    position: absolute;
-    top: 3px; left: -20px;
-    padding: 1px 2px;
-    opacity: 0;
-    -webkit-transition: opacity 0.2s linear;
-  }
-    .for-h1 .pilcrow {
-      top: 47px;
-    }
-    .for-h2 .pilcrow, .for-h3 .pilcrow, .for-h4 .pilcrow {
-      top: 35px;
-    }
-
-  ul.sections > li > div.annotation:hover .pilcrow {
-    opacity: 1;
-  }
-}
-
-/*---------------------- (> 1025px) ---------------------*/
-@media only screen and (min-width: 1025px) {
-
-  body {
-    font-size: 16px;
-    line-height: 24px;
-  }
-
-  #background {
-    width: 525px;
-  }
-  ul.sections > li > div.annotation {
-    max-width: 525px;
-    min-width: 525px;
-    padding: 10px 25px 1px 50px;
-  }
-  ul.sections > li > div.content {
-    padding: 9px 15px 16px 25px;
-  }
-}
-
-/*---------------------- Syntax Highlighting -----------------------------*/
-
-td.linenos { background-color: #f0f0f0; padding-right: 10px; }
-span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }
-/*
-
-github.com style (c) Vasily Polovnyov <vast@whiteants.net>
-
-*/
-
-pre code {
-  display: block; padding: 0.5em;
-  color: #000;
-  background: #f8f8ff
-}
-
-pre .hljs-comment,
-pre .hljs-template_comment,
-pre .hljs-diff .hljs-header,
-pre .hljs-javadoc {
-  color: #408080;
-  font-style: italic
-}
-
-pre .hljs-keyword,
-pre .hljs-assignment,
-pre .hljs-literal,
-pre .hljs-css .hljs-rule .hljs-keyword,
-pre .hljs-winutils,
-pre .hljs-javascript .hljs-title,
-pre .hljs-lisp .hljs-title,
-pre .hljs-subst {
-  color: #954121;
-  /*font-weight: bold*/
-}
-
-pre .hljs-number,
-pre .hljs-hexcolor {
-  color: #40a070
-}
-
-pre .hljs-string,
-pre .hljs-tag .hljs-value,
-pre .hljs-phpdoc,
-pre .hljs-tex .hljs-formula {
-  color: #219161;
-}
-
-pre .hljs-title,
-pre .hljs-id {
-  color: #19469D;
-}
-pre .hljs-params {
-  color: #00F;
-}
-
-pre .hljs-javascript .hljs-title,
-pre .hljs-lisp .hljs-title,
-pre .hljs-subst {
-  font-weight: normal
-}
-
-pre .hljs-class .hljs-title,
-pre .hljs-haskell .hljs-label,
-pre .hljs-tex .hljs-command {
-  color: #458;
-  font-weight: bold
-}
-
-pre .hljs-tag,
-pre .hljs-tag .hljs-title,
-pre .hljs-rules .hljs-property,
-pre .hljs-django .hljs-tag .hljs-keyword {
-  color: #000080;
-  font-weight: normal
-}
-
-pre .hljs-attribute,
-pre .hljs-variable,
-pre .hljs-instancevar,
-pre .hljs-lisp .hljs-body {
-  color: #008080
-}
-
-pre .hljs-regexp {
-  color: #B68
-}
-
-pre .hljs-class {
-  color: #458;
-  font-weight: bold
-}
-
-pre .hljs-symbol,
-pre .hljs-ruby .hljs-symbol .hljs-string,
-pre .hljs-ruby .hljs-symbol .hljs-keyword,
-pre .hljs-ruby .hljs-symbol .hljs-keymethods,
-pre .hljs-lisp .hljs-keyword,
-pre .hljs-tex .hljs-special,
-pre .hljs-input_number {
-  color: #990073
-}
-
-pre .hljs-builtin,
-pre .hljs-constructor,
-pre .hljs-built_in,
-pre .hljs-lisp .hljs-title {
-  color: #0086b3
-}
-
-pre .hljs-preprocessor,
-pre .hljs-pi,
-pre .hljs-doctype,
-pre .hljs-shebang,
-pre .hljs-cdata {
-  color: #999;
-  font-weight: bold
-}
-
-pre .hljs-deletion {
-  background: #fdd
-}
-
-pre .hljs-addition {
-  background: #dfd
-}
-
-pre .hljs-diff .hljs-change {
-  background: #0086b3
-}
-
-pre .hljs-chunk {
-  color: #aaa
-}
-
-pre .hljs-tex .hljs-formula {
-  opacity: 0.5;
-}
--- a/examples/html/public/fonts/aller-bold.eot
+++ b/examples/html/public/fonts/aller-bold.eot
--- a/examples/html/public/fonts/aller-bold.ttf
+++ b/examples/html/public/fonts/aller-bold.ttf
--- a/examples/html/public/fonts/aller-bold.woff
+++ b/examples/html/public/fonts/aller-bold.woff
--- a/examples/html/public/fonts/aller-light.eot
+++ b/examples/html/public/fonts/aller-light.eot
--- a/examples/html/public/fonts/aller-light.ttf
+++ b/examples/html/public/fonts/aller-light.ttf
--- a/examples/html/public/fonts/aller-light.woff
+++ b/examples/html/public/fonts/aller-light.woff
--- a/examples/html/public/fonts/fleurons.eot
+++ b/examples/html/public/fonts/fleurons.eot
--- a/examples/html/public/fonts/fleurons.ttf
+++ b/examples/html/public/fonts/fleurons.ttf
--- a/examples/html/public/fonts/fleurons.woff
+++ b/examples/html/public/fonts/fleurons.woff
--- a/examples/html/public/fonts/roboto-black.eot
+++ b/examples/html/public/fonts/roboto-black.eot
--- a/examples/html/public/fonts/roboto-black.ttf
+++ b/examples/html/public/fonts/roboto-black.ttf
--- a/examples/html/public/fonts/roboto-black.woff
+++ b/examples/html/public/fonts/roboto-black.woff
--- a/examples/html/public/images/gray.png
+++ b/examples/html/public/images/gray.png
--- a/examples/html/public/stylesheets/normalize.css
+++ b/examples/html/public/stylesheets/normalize.css
@@ -1,375 +0,0 @@
-/*! normalize.css v2.0.1 | MIT License | git.io/normalize */
-
-/* ==========================================================================
-   HTML5 display definitions
-   ========================================================================== */
-
-/*
- * Corrects `block` display not defined in IE 8/9.
- */
-
-article,
-aside,
-details,
-figcaption,
-figure,
-footer,
-header,
-hgroup,
-nav,
-section,
-summary {
-    display: block;
-}
-
-/*
- * Corrects `inline-block` display not defined in IE 8/9.
- */
-
-audio,
-canvas,
-video {
-    display: inline-block;
-}
-
-/*
- * Prevents modern browsers from displaying `audio` without controls.
- * Remove excess height in iOS 5 devices.
- */
-
-audio:not([controls]) {
-    display: none;
-    height: 0;
-}
-
-/*
- * Addresses styling for `hidden` attribute not present in IE 8/9.
- */
-
-[hidden] {
-    display: none;
-}
-
-/* ==========================================================================
-   Base
-   ========================================================================== */
-
-/*
- * 1. Sets default font family to sans-serif.
- * 2. Prevents iOS text size adjust after orientation change, without disabling
- *    user zoom.
- */
-
-html {
-    font-family: sans-serif; /* 1 */
-    -webkit-text-size-adjust: 100%; /* 2 */
-    -ms-text-size-adjust: 100%; /* 2 */
-}
-
-/*
- * Removes default margin.
- */
-
-body {
-    margin: 0;
-}
-
-/* ==========================================================================
-   Links
-   ========================================================================== */
-
-/*
- * Addresses `outline` inconsistency between Chrome and other browsers.
- */
-
-a:focus {
-    outline: thin dotted;
-}
-
-/*
- * Improves readability when focused and also mouse hovered in all browsers.
- */
-
-a:active,
-a:hover {
-    outline: 0;
-}
-
-/* ==========================================================================
-   Typography
-   ========================================================================== */
-
-/*
- * Addresses `h1` font sizes within `section` and `article` in Firefox 4+,
- * Safari 5, and Chrome.
- */
-
-h1 {
-    font-size: 2em;
-}
-
-/*
- * Addresses styling not present in IE 8/9, Safari 5, and Chrome.
- */
-
-abbr[title] {
-    border-bottom: 1px dotted;
-}
-
-/*
- * Addresses style set to `bolder` in Firefox 4+, Safari 5, and Chrome.
- */
-
-b,
-strong {
-    font-weight: bold;
-}
-
-/*
- * Addresses styling not present in Safari 5 and Chrome.
- */
-
-dfn {
-    font-style: italic;
-}
-
-/*
- * Addresses styling not present in IE 8/9.
- */
-
-mark {
-    background: #ff0;
-    color: #000;
-}
-
-
-/*
- * Corrects font family set oddly in Safari 5 and Chrome.
- */
-
-code,
-kbd,
-pre,
-samp {
-    font-family: monospace, serif;
-    font-size: 1em;
-}
-
-/*
- * Improves readability of pre-formatted text in all browsers.
- */
-
-pre {
-    white-space: pre;
-    white-space: pre-wrap;
-    word-wrap: break-word;
-}
-
-/*
- * Sets consistent quote types.
- */
-
-q {
-    quotes: "\201C" "\201D" "\2018" "\2019";
-}
-
-/*
- * Addresses inconsistent and variable font size in all browsers.
- */
-
-small {
-    font-size: 80%;
-}
-
-/*
- * Prevents `sub` and `sup` affecting `line-height` in all browsers.
- */
-
-sub,
-sup {
-    font-size: 75%;
-    line-height: 0;
-    position: relative;
-    vertical-align: baseline;
-}
-
-sup {
-    top: -0.5em;
-}
-
-sub {
-    bottom: -0.25em;
-}
-
-/* ==========================================================================
-   Embedded content
-   ========================================================================== */
-
-/*
- * Removes border when inside `a` element in IE 8/9.
- */
-
-img {
-    border: 0;
-}
-
-/*
- * Corrects overflow displayed oddly in IE 9.
- */
-
-svg:not(:root) {
-    overflow: hidden;
-}
-
-/* ==========================================================================
-   Figures
-   ========================================================================== */
-
-/*
- * Addresses margin not present in IE 8/9 and Safari 5.
- */
-
-figure {
-    margin: 0;
-}
-
-/* ==========================================================================
-   Forms
-   ========================================================================== */
-
-/*
- * Define consistent border, margin, and padding.
- */
-
-fieldset {
-    border: 1px solid #c0c0c0;
-    margin: 0 2px;
-    padding: 0.35em 0.625em 0.75em;
-}
-
-/*
- * 1. Corrects color not being inherited in IE 8/9.
- * 2. Remove padding so people aren't caught out if they zero out fieldsets.
- */
-
-legend {
-    border: 0; /* 1 */
-    padding: 0; /* 2 */
-}
-
-/*
- * 1. Corrects font family not being inherited in all browsers.
- * 2. Corrects font size not being inherited in all browsers.
- * 3. Addresses margins set differently in Firefox 4+, Safari 5, and Chrome
- */
-
-button,
-input,
-select,
-textarea {
-    font-family: inherit; /* 1 */
-    font-size: 100%; /* 2 */
-    margin: 0; /* 3 */
-}
-
-/*
- * Addresses Firefox 4+ setting `line-height` on `input` using `!important` in
- * the UA stylesheet.
- */
-
-button,
-input {
-    line-height: normal;
-}
-
-/*
- * 1. Avoid the WebKit bug in Android 4.0.* where (2) destroys native `audio`
- *    and `video` controls.
- * 2. Corrects inability to style clickable `input` types in iOS.
- * 3. Improves usability and consistency of cursor style between image-type
- *    `input` and others.
- */
-
-button,
-html input[type="button"], /* 1 */
-input[type="reset"],
-input[type="submit"] {
-    -webkit-appearance: button; /* 2 */
-    cursor: pointer; /* 3 */
-}
-
-/*
- * Re-set default cursor for disabled elements.
- */
-
-button[disabled],
-input[disabled] {
-    cursor: default;
-}
-
-/*
- * 1. Addresses box sizing set to `content-box` in IE 8/9.
- * 2. Removes excess padding in IE 8/9.
- */
-
-input[type="checkbox"],
-input[type="radio"] {
-    box-sizing: border-box; /* 1 */
-    padding: 0; /* 2 */
-}
-
-/*
- * 1. Addresses `appearance` set to `searchfield` in Safari 5 and Chrome.
- * 2. Addresses `box-sizing` set to `border-box` in Safari 5 and Chrome
- *    (include `-moz` to future-proof).
- */
-
-input[type="search"] {
-    -webkit-appearance: textfield; /* 1 */
-    -moz-box-sizing: content-box;
-    -webkit-box-sizing: content-box; /* 2 */
-    box-sizing: content-box;
-}
-
-/*
- * Removes inner padding and search cancel button in Safari 5 and Chrome
- * on OS X.
- */
-
-input[type="search"]::-webkit-search-cancel-button,
-input[type="search"]::-webkit-search-decoration {
-    -webkit-appearance: none;
-}
-
-/*
- * Removes inner padding and border in Firefox 4+.
- */
-
-button::-moz-focus-inner,
-input::-moz-focus-inner {
-    border: 0;
-    padding: 0;
-}
-
-/*
- * 1. Removes default vertical scrollbar in IE 8/9.
- * 2. Improves readability and alignment in all browsers.
- */
-
-textarea {
-    overflow: auto; /* 1 */
-    vertical-align: top; /* 2 */
-}
-
-/* ==========================================================================
-   Tables
-   ========================================================================== */
-
-/*
- * Remove most spacing between table cells.
- */
-
-table {
-    border-collapse: collapse;
-    border-spacing: 0;
-}
--- a/examples/html/simple_search.html
+++ b/examples/html/simple_search.html
@@ -1,542 +0,0 @@
-<!DOCTYPE html>
-
-<html>
-<head>
-  <title>simple_search.rs</title>
-  <meta http-equiv="content-type" content="text/html; charset=UTF-8">
-  <meta name="viewport" content="width=device-width, target-densitydpi=160dpi, initial-scale=1.0; maximum-scale=1.0; user-scalable=0;">
-  <link rel="stylesheet" media="all" href="docco.css" />
-</head>
-<body>
-  <div id="container">
-    <div id="background"></div>
-    
-    <ul class="sections">
-        
-          <li id="title">
-              <div class="annotation">
-                  <h1>simple_search.rs</h1>
-              </div>
-          </li>
-        
-        
-        
-        <li id="section-1">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-1">&#182;</a>
-              </div>
-              
-            </div>
-            
-            <div class="content"><div class='highlight'><pre><span class="hljs-keyword">extern</span> <span class="hljs-keyword">crate</span> tantivy;
-<span class="hljs-keyword">extern</span> <span class="hljs-keyword">crate</span> tempdir;
-
-<span class="hljs-meta">#[macro_use]</span>
-<span class="hljs-keyword">extern</span> <span class="hljs-keyword">crate</span> serde_json;
-
-<span class="hljs-keyword">use</span> std::path::Path;
-<span class="hljs-keyword">use</span> tempdir::TempDir;
-<span class="hljs-keyword">use</span> tantivy::Index;
-<span class="hljs-keyword">use</span> tantivy::schema::*;
-<span class="hljs-keyword">use</span> tantivy::collector::TopCollector;
-<span class="hljs-keyword">use</span> tantivy::query::QueryParser;
-
-<span class="hljs-function"><span class="hljs-keyword">fn</span> <span class="hljs-title">main</span></span>() {</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-2">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-2">&#182;</a>
-              </div>
-              <p>Let’s create a temporary directory for the
-sake of this example</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">if</span> <span class="hljs-keyword">let</span> <span class="hljs-literal">Ok</span>(dir) = TempDir::new(<span class="hljs-string">"tantivy_example_dir"</span>) {
-        run_example(dir.path()).unwrap();
-        dir.close().unwrap();
-    }
-}
-
-
-<span class="hljs-function"><span class="hljs-keyword">fn</span> <span class="hljs-title">run_example</span></span>(index_path: &amp;Path) -&gt; tantivy::<span class="hljs-built_in">Result</span>&lt;()&gt; {</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-3">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-3">&#182;</a>
-              </div>
-              <h1 id="defining-the-schema">Defining the schema</h1>
-<p>The Tantivy index requires a very strict schema.
-The schema declares which fields are in the index,
-and for each field, its type and “the way it should
-be indexed”.</p>
-
-            </div>
-            
-        </li>
-        
-        
-        <li id="section-4">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-4">&#182;</a>
-              </div>
-              <p>first we need to define a schema …</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> schema_builder = SchemaBuilder::<span class="hljs-keyword">default</span>();</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-5">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-5">&#182;</a>
-              </div>
-              <p>Our first field is title.
-We want full-text search for it, and we also want 
-to be able to retrieve the document after the search.</p>
-<p>TEXT | STORED is some syntactic sugar to describe
-that.</p>
-<p><code>TEXT</code> means the field should be tokenized and indexed,
-along with its term frequency and term positions.</p>
-<p><code>STORED</code> means that the field will also be saved
-in a compressed, row-oriented key-value store.
-This store is useful to reconstruct the
-documents that were selected during the search phase.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    schema_builder.add_text_field(<span class="hljs-string">"title"</span>, TEXT | STORED);</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-6">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-6">&#182;</a>
-              </div>
-              <p>Our second field is body.
-We want full-text search for it, but we do not 
-need to be able to be able to retrieve it
-for our application. </p>
-<p>We can make our index lighter and 
-by omitting <code>STORED</code> flag.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    schema_builder.add_text_field(<span class="hljs-string">"body"</span>, TEXT);
-
-    <span class="hljs-keyword">let</span> schema = schema_builder.build();</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-7">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-7">&#182;</a>
-              </div>
-              <h1 id="indexing-documents">Indexing documents</h1>
-<p>Let’s create a brand new index.</p>
-<p>This will actually just save a meta.json
-with our schema in the directory.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> index = Index::create(index_path, schema.clone())?;</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-8">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-8">&#182;</a>
-              </div>
-              <p>To insert document we need an index writer.
-There must be only one writer at a time.
-This single <code>IndexWriter</code> is already
-multithreaded.</p>
-<p>Here we use a buffer of 50MB per thread. Using a bigger
-heap for the indexer can increase its throughput.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> index_writer = index.writer(<span class="hljs-number">50_000_000</span>)?;</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-9">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-9">&#182;</a>
-              </div>
-              <p>Let’s index our documents!
-We first need a handle on the title and the body field.</p>
-
-            </div>
-            
-        </li>
-        
-        
-        <li id="section-10">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-10">&#182;</a>
-              </div>
-              <h3 id="create-a-document-manually-">Create a document “manually”.</h3>
-<p>We can create a document manually, by setting the fields
-one by one in a Document object.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> title = schema.get_field(<span class="hljs-string">"title"</span>).unwrap();
-    <span class="hljs-keyword">let</span> body = schema.get_field(<span class="hljs-string">"body"</span>).unwrap();
-
-    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> old_man_doc = Document::<span class="hljs-keyword">default</span>();
-    old_man_doc.add_text(title, <span class="hljs-string">"The Old Man and the Sea"</span>);
-    old_man_doc.add_text(
-        body,
-        <span class="hljs-string">"He was an old man who fished alone in a skiff in the Gulf Stream and \
-                          he had gone eighty-four days now without taking a fish."</span>,
-    );</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-11">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-11">&#182;</a>
-              </div>
-              <p>… and add it to the <code>IndexWriter</code>.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    index_writer.add_document(old_man_doc);</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-12">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-12">&#182;</a>
-              </div>
-              <h3 id="create-a-document-directly-from-json-">Create a document directly from json.</h3>
-<p>Alternatively, we can use our schema to parse a
-document object directly from json.
-The document is a string, but we use the <code>json</code> macro
-from <code>serde_json</code> for the convenience of multi-line support.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> json = json!({
-       <span class="hljs-string">"title"</span>: <span class="hljs-string">"Of Mice and Men"</span>,
-       <span class="hljs-string">"body"</span>: <span class="hljs-string">"A few miles south of Soledad, the Salinas River drops in close to the hillside \
-                bank and runs deep and green. The water is warm too, for it has slipped twinkling \
-                over the yellow sands in the sunlight before reaching the narrow pool. On one \
-                side of the river the golden foothill slopes curve up to the strong and rocky \
-                Gabilan Mountains, but on the valley side the water is lined with trees—willows \
-                fresh and green with every spring, carrying in their lower leaf junctures the \
-                debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
-                limbs and branches that arch over the pool"</span>
-    });
-    <span class="hljs-keyword">let</span> mice_and_men_doc = schema.parse_document(&amp;json.to_string())?;
-
-    index_writer.add_document(mice_and_men_doc);</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-13">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-13">&#182;</a>
-              </div>
-              <p>Multi-valued field are allowed, they are
-expressed in JSON by an array.
-The following document has two titles.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> json = json!({
-       <span class="hljs-string">"title"</span>: [<span class="hljs-string">"Frankenstein"</span>, <span class="hljs-string">"The Modern Prometheus"</span>],
-       <span class="hljs-string">"body"</span>: <span class="hljs-string">"You will rejoice to hear that no disaster has accompanied the commencement of an \
-                enterprise which you have regarded with such evil forebodings.  I arrived here \
-                yesterday, and my first task is to assure my dear sister of my welfare and \
-                increasing confidence in the success of my undertaking."</span>
-    });
-    <span class="hljs-keyword">let</span> frankenstein_doc = schema.parse_document(&amp;json.to_string())?;
-
-    index_writer.add_document(frankenstein_doc);</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-14">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-14">&#182;</a>
-              </div>
-              <p>This is an example, so we will only index 3 documents
-here. You can check out tantivy’s tutorial to index
-the English wikipedia. Tantivy’s indexing is rather fast.
-Indexing 5 million articles of the English wikipedia takes
-around 4 minutes on my computer!</p>
-
-            </div>
-            
-        </li>
-        
-        
-        <li id="section-15">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-15">&#182;</a>
-              </div>
-              <h3 id="committing">Committing</h3>
-<p>At this point our documents are not searchable.</p>
-<p>We need to call .commit() explicitly to force the
-index_writer to finish processing the documents in the queue,
-flush the current index to the disk, and advertise
-the existence of new documents.</p>
-<p>This call is blocking.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    index_writer.commit()?;</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-16">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-16">&#182;</a>
-              </div>
-              <p>If <code>.commit()</code> returns correctly, then all of the
-documents that have been added are guaranteed to be
-persistently indexed.</p>
-<p>In the scenario of a crash or a power failure,
-tantivy behaves as if has rolled back to its last
-commit.</p>
-
-            </div>
-            
-        </li>
-        
-        
-        <li id="section-17">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-17">&#182;</a>
-              </div>
-              <h1 id="searching">Searching</h1>
-<p>Let’s search our index. Start by reloading
-searchers in the index. This should be done
-after every commit().</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    index.load_searchers()?;</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-18">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-18">&#182;</a>
-              </div>
-              <p>Afterwards create one (or more) searchers.</p>
-<p>You should create a searcher
-every time you start a “search query”.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> searcher = index.searcher();</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-19">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-19">&#182;</a>
-              </div>
-              <p>The query parser can interpret human queries.
-Here, if the user does not specify which
-field they want to search, tantivy will search
-in both title and body.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> query_parser = QueryParser::for_index(index, <span class="hljs-built_in">vec!</span>[title, body]);</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-20">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-20">&#182;</a>
-              </div>
-              <p>QueryParser may fail if the query is not in the right
-format. For user facing applications, this can be a problem.
-A ticket has been opened regarding this problem.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> query = query_parser.parse_query(<span class="hljs-string">"sea whale"</span>)?;</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-21">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-21">&#182;</a>
-              </div>
-              <p>A query defines a set of documents, as
-well as the way they should be scored.</p>
-<p>A query created by the query parser is scored according
-to a metric called Tf-Idf, and will consider
-any document matching at least one of our terms.</p>
-
-            </div>
-            
-        </li>
-        
-        
-        <li id="section-22">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-22">&#182;</a>
-              </div>
-              <h3 id="collectors">Collectors</h3>
-<p>We are not interested in all of the documents but
-only in the top 10. Keeping track of our top 10 best documents
-is the role of the TopCollector.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> top_collector = TopCollector::with_limit(<span class="hljs-number">10</span>);</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-23">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-23">&#182;</a>
-              </div>
-              <p>We can now perform our query.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    searcher.search(&amp;*query, &amp;<span class="hljs-keyword">mut</span> top_collector)?;</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-24">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-24">&#182;</a>
-              </div>
-              <p>Our top collector now contains the 10
-most relevant doc ids…</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> doc_addresses = top_collector.docs();</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-25">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-25">&#182;</a>
-              </div>
-              <p>The actual documents still need to be
-retrieved from Tantivy’s store.</p>
-<p>Since the body field was not configured as stored,
-the document returned will only contain
-a title.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>
-    <span class="hljs-keyword">for</span> doc_address <span class="hljs-keyword">in</span> doc_addresses {
-        <span class="hljs-keyword">let</span> retrieved_doc = searcher.doc(&amp;doc_address)?;
-        <span class="hljs-built_in">println!</span>(<span class="hljs-string">"{}"</span>, schema.to_json(&amp;retrieved_doc));
-    }</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-26">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-26">&#182;</a>
-              </div>
-              <p>Wait for indexing and merging threads to shut down.
-Usually this isn’t needed, but in <code>main</code> we try to
-delete the temporary directory and that fails on
-Windows if the files are still open.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    index_writer.wait_merging_threads()?;
-
-    <span class="hljs-literal">Ok</span>(())
-}</pre></div></div>
-            
-        </li>
-        
-    </ul>
-  </div>
-</body>
-</html>
--- a/examples/integer_range_search.rs
+++ b/examples/integer_range_search.rs
@@ -0,0 +1,39 @@
+// # Searching a range on an indexed int field.
+//
+// Below is an example of creating an indexed integer field in your schema
+// You can use RangeQuery to get a Count of all occurrences in a given range.
+use tantivy::collector::Count;
+use tantivy::query::RangeQuery;
+use tantivy::schema::{Schema, INDEXED};
+use tantivy::{doc, Index, Result};
+
+fn run() -> Result<()> {
+    // For the sake of simplicity, this schema will only have 1 field
+    let mut schema_builder = Schema::builder();
+
+    // `INDEXED` is a short-hand to indicate that our field should be "searchable".
+    let year_field = schema_builder.add_u64_field("year", INDEXED);
+    let schema = schema_builder.build();
+    let index = Index::create_in_ram(schema);
+    let reader = index.reader()?;
+    {
+        let mut index_writer = index.writer_with_num_threads(1, 6_000_000)?;
+        for year in 1950u64..2019u64 {
+            index_writer.add_document(doc!(year_field => year));
+        }
+        index_writer.commit()?;
+        // The index will be a range of years
+    }
+    reader.reload()?;
+    let searcher = reader.searcher();
+    // The end is excluded i.e. here we are searching up to 1969
+    let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960..1970);
+    // Uses a Count collector to sum the total number of docs in the range
+    let num_60s_books = searcher.search(&docs_in_the_sixties, &Count)?;
+    assert_eq!(num_60s_books, 10);
+    Ok(())
+}
+
+fn main() {
+    run().unwrap()
+}
--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -0,0 +1,135 @@
+// # Iterating docs and positioms.
+//
+// At its core of tantivy, relies on a data structure
+// called an inverted index.
+//
+// This example shows how to manually iterate through
+// the list of documents containing a term, getting
+// its term frequency, and accessing its positions.
+
+// ---
+// Importing tantivy...
+use tantivy::schema::*;
+use tantivy::{doc, DocSet, Index, Postings, TERMINATED};
+
+fn main() -> tantivy::Result<()> {
+    // We first create a schema for the sake of the
+    // example. Check the `basic_search` example for more information.
+    let mut schema_builder = Schema::builder();
+
+    // For this example, we need to make sure to index positions for our title
+    // field. `TEXT` precisely does this.
+    let title = schema_builder.add_text_field("title", TEXT | STORED);
+    let schema = schema_builder.build();
+
+    let index = Index::create_in_ram(schema.clone());
+
+    let mut index_writer = index.writer_with_num_threads(1, 50_000_000)?;
+    index_writer.add_document(doc!(title => "The Old Man and the Sea"));
+    index_writer.add_document(doc!(title => "Of Mice and Men"));
+    index_writer.add_document(doc!(title => "The modern Promotheus"));
+    index_writer.commit()?;
+
+    let reader = index.reader()?;
+
+    let searcher = reader.searcher();
+
+    // A tantivy index is actually a collection of segments.
+    // Similarly, a searcher just wraps a list `segment_reader`.
+    //
+    // (Because we indexed a very small number of documents over one thread
+    // there is actually only one segment here, but let's iterate through the list
+    // anyway)
+    for segment_reader in searcher.segment_readers() {
+        // A segment contains different data structure.
+        // Inverted index stands for the combination of
+        // - the term dictionary
+        // - the inverted lists associated to each terms and their positions
+        let inverted_index = segment_reader.inverted_index(title);
+
+        // A `Term` is a text token associated with a field.
+        // Let's go through all docs containing the term `title:the` and access their position
+        let term_the = Term::from_field_text(title, "the");
+
+        // This segment posting object is like a cursor over the documents matching the term.
+        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies
+        // and positions.
+        //
+        // If you don't need all this information, you may get better performance by decompressing less
+        // information.
+        if let Some(mut segment_postings) =
+            inverted_index.read_postings(&term_the, IndexRecordOption::WithFreqsAndPositions)
+        {
+            // this buffer will be used to request for positions
+            let mut positions: Vec<u32> = Vec::with_capacity(100);
+            let mut doc_id = segment_postings.doc();
+            while doc_id != TERMINATED {
+                // This MAY contains deleted documents as well.
+                if segment_reader.is_deleted(doc_id) {
+                    doc_id = segment_postings.advance();
+                    continue;
+                }
+
+                // the number of time the term appears in the document.
+                let term_freq: u32 = segment_postings.term_freq();
+                // accessing positions is slightly expensive and lazy, do not request
+                // for them if you don't need them for some documents.
+                segment_postings.positions(&mut positions);
+
+                // By definition we should have `term_freq` positions.
+                assert_eq!(positions.len(), term_freq as usize);
+
+                // This prints:
+                // ```
+                // Doc 0: TermFreq 2: [0, 4]
+                // Doc 2: TermFreq 1: [0]
+                // ```
+                println!("Doc {}: TermFreq {}: {:?}", doc_id, term_freq, positions);
+                doc_id = segment_postings.advance();
+            }
+        }
+    }
+
+    // A `Term` is a text token associated with a field.
+    // Let's go through all docs containing the term `title:the` and access their position
+    let term_the = Term::from_field_text(title, "the");
+
+    // Some other powerful operations (especially `.skip_to`) may be useful to consume these
+    // posting lists rapidly.
+    // You can check for them in the [`DocSet`](https://docs.rs/tantivy/~0/tantivy/trait.DocSet.html) trait
+    // and the [`Postings`](https://docs.rs/tantivy/~0/tantivy/trait.Postings.html) trait
+
+    // Also, for some VERY specific high performance use case like an OLAP analysis of logs,
+    // you can get better performance by accessing directly the blocks of doc ids.
+    for segment_reader in searcher.segment_readers() {
+        // A segment contains different data structure.
+        // Inverted index stands for the combination of
+        // - the term dictionary
+        // - the inverted lists associated to each terms and their positions
+        let inverted_index = segment_reader.inverted_index(title);
+
+        // This segment posting object is like a cursor over the documents matching the term.
+        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies
+        // and positions.
+        //
+        // If you don't need all this information, you may get better performance by decompressing less
+        // information.
+        if let Some(mut block_segment_postings) =
+            inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic)
+        {
+            loop {
+                let docs = block_segment_postings.docs();
+                if docs.is_empty() {
+                    break;
+                }
+                // Once again these docs MAY contains deleted documents as well.
+                let docs = block_segment_postings.docs();
+                // Prints `Docs [0, 2].`
+                println!("Docs {:?}", docs);
+                block_segment_postings.advance();
+            }
+        }
+    }
+
+    Ok(())
+}
--- a/examples/multiple_producer.rs
+++ b/examples/multiple_producer.rs
@@ -0,0 +1,100 @@
+// # Indexing from different threads.
+//
+// It is fairly common to have to index from different threads.
+// Tantivy forbids to create more than one `IndexWriter` at a time.
+//
+// This `IndexWriter` itself has its own multithreaded layer, so managing your own
+// indexing threads will not help. However, it can still be useful for some applications.
+//
+// For instance, if preparing documents to send to tantivy before indexing is the bottleneck of
+// your application, it is reasonable to have multiple threads.
+//
+// Another very common reason to want to index from multiple threads, is implementing a webserver
+// with CRUD capabilities. The server framework will most likely handle request from
+// different threads.
+//
+// The recommended way to address both of these use case is to wrap your `IndexWriter` into a
+// `Arc<RwLock<IndexWriter>>`.
+//
+// While this is counterintuitive, adding and deleting documents do not require mutability
+// over the `IndexWriter`, so several threads will be able to do this operation concurrently.
+//
+// The example below does not represent an actual real-life use case (who would spawn thread to
+// index a single document?), but aims at demonstrating the mechanism that makes indexing
+// from several threads possible.
+
+// ---
+// Importing tantivy...
+use std::sync::{Arc, RwLock};
+use std::thread;
+use std::time::Duration;
+use tantivy::schema::{Schema, STORED, TEXT};
+use tantivy::{doc, Index, IndexWriter, Opstamp};
+
+fn main() -> tantivy::Result<()> {
+    // # Defining the schema
+    let mut schema_builder = Schema::builder();
+    let title = schema_builder.add_text_field("title", TEXT | STORED);
+    let body = schema_builder.add_text_field("body", TEXT);
+    let schema = schema_builder.build();
+
+    let index = Index::create_in_ram(schema);
+    let index_writer: Arc<RwLock<IndexWriter>> = Arc::new(RwLock::new(index.writer(50_000_000)?));
+
+    // # First indexing thread.
+    let index_writer_clone_1 = index_writer.clone();
+    thread::spawn(move || {
+        // we index 100 times the document... for the sake of the example.
+        for i in 0..100 {
+            let opstamp = index_writer_clone_1
+                .read().unwrap() //< A read lock is sufficient here.
+                .add_document(
+                    doc!(
+                        title => "Of Mice and Men",
+                        body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
+                        bank and runs deep and green. The water is warm too, for it has slipped twinkling \
+                        over the yellow sands in the sunlight before reaching the narrow pool. On one \
+                        side of the river the golden foothill slopes curve up to the strong and rocky \
+                        Gabilan Mountains, but on the valley side the water is lined with trees—willows \
+                        fresh and green with every spring, carrying in their lower leaf junctures the \
+                        debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
+                        limbs and branches that arch over the pool"
+                    ));
+            println!("add doc {} from thread 1 - opstamp {}", i, opstamp);
+            thread::sleep(Duration::from_millis(20));
+        }
+    });
+
+    // # Second indexing thread.
+    let index_writer_clone_2 = index_writer.clone();
+    // For convenience, tantivy also comes with a macro to
+    // reduce the boilerplate above.
+    thread::spawn(move || {
+        // we index 100 times the document... for the sake of the example.
+        for i in 0..100 {
+            // A read lock is sufficient here.
+            let opstamp = {
+                let index_writer_rlock = index_writer_clone_2.read().unwrap();
+                index_writer_rlock.add_document(doc!(
+                    title => "Manufacturing consent",
+                    body => "Some great book description..."
+                ))
+            };
+            println!("add doc {} from thread 2 - opstamp {}", i, opstamp);
+            thread::sleep(Duration::from_millis(10));
+        }
+    });
+
+    // # In the main thread, we commit 10 times, once every 500ms.
+    for _ in 0..10 {
+        let opstamp: Opstamp = {
+            // Committing or rollbacking on the other hand requires write lock. This will block other threads.
+            let mut index_writer_wlock = index_writer.write().unwrap();
+            index_writer_wlock.commit().unwrap()
+        };
+        println!("committed with opstamp {}", opstamp);
+        thread::sleep(Duration::from_millis(500));
+    }
+
+    Ok(())
+}
--- a/examples/pre_tokenized_text.rs
+++ b/examples/pre_tokenized_text.rs
@@ -0,0 +1,139 @@
+// # Pre-tokenized text example
+//
+// This example shows how to use pre-tokenized text. Sometimes yout might
+// want to index and search through text which is already split into
+// tokens by some external tool.
+//
+// In this example we will:
+// - use tantivy tokenizer to create tokens and load them directly into tantivy,
+// - import tokenized text straight from json,
+// - perform a search on documents with pre-tokenized text
+
+use tantivy::collector::{Count, TopDocs};
+use tantivy::query::TermQuery;
+use tantivy::schema::*;
+use tantivy::tokenizer::{PreTokenizedString, SimpleTokenizer, Token, Tokenizer};
+use tantivy::{doc, Index, ReloadPolicy};
+use tempfile::TempDir;
+
+fn pre_tokenize_text(text: &str) -> Vec<Token> {
+    let mut token_stream = SimpleTokenizer.token_stream(text);
+    let mut tokens = vec![];
+    while token_stream.advance() {
+        tokens.push(token_stream.token().clone());
+    }
+    tokens
+}
+
+fn main() -> tantivy::Result<()> {
+    let index_path = TempDir::new()?;
+
+    let mut schema_builder = Schema::builder();
+
+    schema_builder.add_text_field("title", TEXT | STORED);
+    schema_builder.add_text_field("body", TEXT);
+
+    let schema = schema_builder.build();
+
+    let index = Index::create_in_dir(&index_path, schema.clone())?;
+
+    let mut index_writer = index.writer(50_000_000)?;
+
+    // We can create a document manually, by setting the fields
+    // one by one in a Document object.
+    let title = schema.get_field("title").unwrap();
+    let body = schema.get_field("body").unwrap();
+
+    let title_text = "The Old Man and the Sea";
+    let body_text = "He was an old man who fished alone in a skiff in the Gulf Stream";
+
+    // Content of our first document
+    // We create `PreTokenizedString` which contains original text and vector of tokens
+    let title_tok = PreTokenizedString {
+        text: String::from(title_text),
+        tokens: pre_tokenize_text(title_text),
+    };
+
+    println!(
+        "Original text: \"{}\" and tokens: {:?}",
+        title_tok.text, title_tok.tokens
+    );
+
+    let body_tok = PreTokenizedString {
+        text: String::from(body_text),
+        tokens: pre_tokenize_text(body_text),
+    };
+
+    // Now lets create a document and add our `PreTokenizedString`
+    let old_man_doc = doc!(title => title_tok, body => body_tok);
+
+    // ... now let's just add it to the IndexWriter
+    index_writer.add_document(old_man_doc);
+
+    // Pretokenized text can also be fed as JSON
+    let short_man_json = r#"{
+        "title":[{
+            "text":"The Old Man",
+            "tokens":[
+                {"offset_from":0,"offset_to":3,"position":0,"text":"The","position_length":1},
+                {"offset_from":4,"offset_to":7,"position":1,"text":"Old","position_length":1},
+                {"offset_from":8,"offset_to":11,"position":2,"text":"Man","position_length":1}
+            ]
+        }]
+    }"#;
+
+    let short_man_doc = schema.parse_document(&short_man_json)?;
+
+    index_writer.add_document(short_man_doc);
+
+    // Let's commit changes
+    index_writer.commit()?;
+
+    // ... and now is the time to query our index
+
+    let reader = index
+        .reader_builder()
+        .reload_policy(ReloadPolicy::OnCommit)
+        .try_into()?;
+
+    let searcher = reader.searcher();
+
+    // We want to get documents with token "Man", we will use TermQuery to do it
+    // Using PreTokenizedString means the tokens are stored as is avoiding stemming
+    // and lowercasing, which preserves full words in their original form
+    let query = TermQuery::new(
+        Term::from_field_text(title, "Man"),
+        IndexRecordOption::Basic,
+    );
+
+    let (top_docs, count) = searcher
+        .search(&query, &(TopDocs::with_limit(2), Count))
+        .unwrap();
+
+    assert_eq!(count, 2);
+
+    // Now let's print out the results.
+    // Note that the tokens are not stored along with the original text
+    // in the document store
+    for (_score, doc_address) in top_docs {
+        let retrieved_doc = searcher.doc(doc_address)?;
+        println!("Document: {}", schema.to_json(&retrieved_doc));
+    }
+
+    // In contrary to the previous query, when we search for the "man" term we
+    // should get no results, as it's not one of the indexed tokens. SimpleTokenizer
+    // only splits text on whitespace / punctuation.
+
+    let query = TermQuery::new(
+        Term::from_field_text(title, "man"),
+        IndexRecordOption::Basic,
+    );
+
+    let (_top_docs, count) = searcher
+        .search(&query, &(TopDocs::with_limit(2), Count))
+        .unwrap();
+
+    assert_eq!(count, 0);
+
+    Ok(())
+}
--- a/examples/simple_search.rs
+++ b/examples/simple_search.rs
@@ -1,225 +0,0 @@
-extern crate tantivy;
-extern crate tempdir;
-
-#[macro_use]
-extern crate serde_json;
-
-use std::path::Path;
-use tantivy::collector::TopCollector;
-use tantivy::query::QueryParser;
-use tantivy::schema::*;
-use tantivy::Index;
-use tempdir::TempDir;
-
-fn main() {
-    // Let's create a temporary directory for the
-    // sake of this example
-    if let Ok(dir) = TempDir::new("tantivy_example_dir") {
-        run_example(dir.path()).unwrap();
-        dir.close().unwrap();
-    }
-}
-
-fn run_example(index_path: &Path) -> tantivy::Result<()> {
-    // # Defining the schema
-    //
-    // The Tantivy index requires a very strict schema.
-    // The schema declares which fields are in the index,
-    // and for each field, its type and "the way it should
-    // be indexed".
-
-    // first we need to define a schema ...
-    let mut schema_builder = SchemaBuilder::default();
-
-    // Our first field is title.
-    // We want full-text search for it, and we also want
-    // to be able to retrieve the document after the search.
-    //
-    // TEXT | STORED is some syntactic sugar to describe
-    // that.
-    //
-    // `TEXT` means the field should be tokenized and indexed,
-    // along with its term frequency and term positions.
-    //
-    // `STORED` means that the field will also be saved
-    // in a compressed, row-oriented key-value store.
-    // This store is useful to reconstruct the
-    // documents that were selected during the search phase.
-    schema_builder.add_text_field("title", TEXT | STORED);
-
-    // Our second field is body.
-    // We want full-text search for it, but we do not
-    // need to be able to be able to retrieve it
-    // for our application.
-    //
-    // We can make our index lighter and
-    // by omitting `STORED` flag.
-    schema_builder.add_text_field("body", TEXT);
-
-    let schema = schema_builder.build();
-
-    // # Indexing documents
-    //
-    // Let's create a brand new index.
-    //
-    // This will actually just save a meta.json
-    // with our schema in the directory.
-    let index = Index::create_in_dir(index_path, schema.clone())?;
-
-    // To insert document we need an index writer.
-    // There must be only one writer at a time.
-    // This single `IndexWriter` is already
-    // multithreaded.
-    //
-    // Here we use a buffer of 50MB per thread. Using a bigger
-    // heap for the indexer can increase its throughput.
-    let mut index_writer = index.writer(50_000_000)?;
-
-    // Let's index our documents!
-    // We first need a handle on the title and the body field.
-
-    // ### Create a document "manually".
-    //
-    // We can create a document manually, by setting the fields
-    // one by one in a Document object.
-    let title = schema.get_field("title").unwrap();
-    let body = schema.get_field("body").unwrap();
-
-    let mut old_man_doc = Document::default();
-    old_man_doc.add_text(title, "The Old Man and the Sea");
-    old_man_doc.add_text(
-        body,
-        "He was an old man who fished alone in a skiff in the Gulf Stream and \
-         he had gone eighty-four days now without taking a fish.",
-    );
-
-    // ... and add it to the `IndexWriter`.
-    index_writer.add_document(old_man_doc);
-
-    // ### Create a document directly from json.
-    //
-    // Alternatively, we can use our schema to parse a
-    // document object directly from json.
-    // The document is a string, but we use the `json` macro
-    // from `serde_json` for the convenience of multi-line support.
-    let json = json!({
-       "title": "Of Mice and Men",
-       "body": "A few miles south of Soledad, the Salinas River drops in close to the hillside \
-                bank and runs deep and green. The water is warm too, for it has slipped twinkling \
-                over the yellow sands in the sunlight before reaching the narrow pool. On one \
-                side of the river the golden foothill slopes curve up to the strong and rocky \
-                Gabilan Mountains, but on the valley side the water is lined with trees—willows \
-                fresh and green with every spring, carrying in their lower leaf junctures the \
-                debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
-                limbs and branches that arch over the pool"
-    });
-    let mice_and_men_doc = schema.parse_document(&json.to_string())?;
-
-    index_writer.add_document(mice_and_men_doc);
-
-    // Multi-valued field are allowed, they are
-    // expressed in JSON by an array.
-    // The following document has two titles.
-    let json = json!({
-       "title": ["Frankenstein", "The Modern Prometheus"],
-       "body": "You will rejoice to hear that no disaster has accompanied the commencement of an \
-                enterprise which you have regarded with such evil forebodings.  I arrived here \
-                yesterday, and my first task is to assure my dear sister of my welfare and \
-                increasing confidence in the success of my undertaking."
-    });
-    let frankenstein_doc = schema.parse_document(&json.to_string())?;
-
-    index_writer.add_document(frankenstein_doc);
-
-    // This is an example, so we will only index 3 documents
-    // here. You can check out tantivy's tutorial to index
-    // the English wikipedia. Tantivy's indexing is rather fast.
-    // Indexing 5 million articles of the English wikipedia takes
-    // around 4 minutes on my computer!
-
-    // ### Committing
-    //
-    // At this point our documents are not searchable.
-    //
-    //
-    // We need to call .commit() explicitly to force the
-    // index_writer to finish processing the documents in the queue,
-    // flush the current index to the disk, and advertise
-    // the existence of new documents.
-    //
-    // This call is blocking.
-    index_writer.commit()?;
-
-    // If `.commit()` returns correctly, then all of the
-    // documents that have been added are guaranteed to be
-    // persistently indexed.
-    //
-    // In the scenario of a crash or a power failure,
-    // tantivy behaves as if has rolled back to its last
-    // commit.
-
-    // # Searching
-    //
-    // Let's search our index. Start by reloading
-    // searchers in the index. This should be done
-    // after every commit().
-    index.load_searchers()?;
-
-    // Afterwards create one (or more) searchers.
-    //
-    // You should create a searcher
-    // every time you start a "search query".
-    let searcher = index.searcher();
-
-    // The query parser can interpret human queries.
-    // Here, if the user does not specify which
-    // field they want to search, tantivy will search
-    // in both title and body.
-    let query_parser = QueryParser::for_index(&index, vec![title, body]);
-
-    // QueryParser may fail if the query is not in the right
-    // format. For user facing applications, this can be a problem.
-    // A ticket has been opened regarding this problem.
-    let query = query_parser.parse_query("sea whale")?;
-
-    // A query defines a set of documents, as
-    // well as the way they should be scored.
-    //
-    // A query created by the query parser is scored according
-    // to a metric called Tf-Idf, and will consider
-    // any document matching at least one of our terms.
-
-    // ### Collectors
-    //
-    // We are not interested in all of the documents but
-    // only in the top 10. Keeping track of our top 10 best documents
-    // is the role of the TopCollector.
-    let mut top_collector = TopCollector::with_limit(10);
-
-    // We can now perform our query.
-    searcher.search(&*query, &mut top_collector)?;
-
-    // Our top collector now contains the 10
-    // most relevant doc ids...
-    let doc_addresses = top_collector.docs();
-
-    // The actual documents still need to be
-    // retrieved from Tantivy's store.
-    //
-    // Since the body field was not configured as stored,
-    // the document returned will only contain
-    // a title.
-
-    for doc_address in doc_addresses {
-        let retrieved_doc = searcher.doc(&doc_address)?;
-        println!("{}", schema.to_json(&retrieved_doc));
-    }
-
-    // Wait for indexing and merging threads to shut down.
-    // Usually this isn't needed, but in `main` we try to
-    // delete the temporary directory and that fails on
-    // Windows if the files are still open.
-    index_writer.wait_merging_threads()?;
-
-    Ok(())
-}
--- a/examples/snippet.rs
+++ b/examples/snippet.rs
@@ -0,0 +1,82 @@
+// # Snippet example
+//
+// This example shows how to return a representative snippet of
+// your hit result.
+// Snippet are an extracted of a target document, and returned in HTML format.
+// The keyword searched by the user are highlighted with a `<b>` tag.
+
+// ---
+// Importing tantivy...
+use tantivy::collector::TopDocs;
+use tantivy::query::QueryParser;
+use tantivy::schema::*;
+use tantivy::{doc, Index, Snippet, SnippetGenerator};
+use tempfile::TempDir;
+
+fn main() -> tantivy::Result<()> {
+    // Let's create a temporary directory for the
+    // sake of this example
+    let index_path = TempDir::new()?;
+
+    // # Defining the schema
+    let mut schema_builder = Schema::builder();
+    let title = schema_builder.add_text_field("title", TEXT | STORED);
+    let body = schema_builder.add_text_field("body", TEXT | STORED);
+    let schema = schema_builder.build();
+
+    // # Indexing documents
+    let index = Index::create_in_dir(&index_path, schema.clone())?;
+
+    let mut index_writer = index.writer(50_000_000)?;
+
+    // we'll only need one doc for this example.
+    index_writer.add_document(doc!(
+    title => "Of Mice and Men",
+    body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
+            bank and runs deep and green. The water is warm too, for it has slipped twinkling \
+            over the yellow sands in the sunlight before reaching the narrow pool. On one \
+            side of the river the golden foothill slopes curve up to the strong and rocky \
+            Gabilan Mountains, but on the valley side the water is lined with trees—willows \
+            fresh and green with every spring, carrying in their lower leaf junctures the \
+            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
+            limbs and branches that arch over the pool"
+    ));
+    // ...
+    index_writer.commit()?;
+
+    let reader = index.reader()?;
+    let searcher = reader.searcher();
+    let query_parser = QueryParser::for_index(&index, vec![title, body]);
+    let query = query_parser.parse_query("sycamore spring")?;
+
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
+
+    let snippet_generator = SnippetGenerator::create(&searcher, &*query, body)?;
+
+    for (score, doc_address) in top_docs {
+        let doc = searcher.doc(doc_address)?;
+        let snippet = snippet_generator.snippet_from_doc(&doc);
+        println!("Document score {}:", score);
+        println!("title: {}", doc.get_first(title).unwrap().text().unwrap());
+        println!("snippet: {}", snippet.to_html());
+        println!("custom highlighting: {}", highlight(snippet));
+    }
+
+    Ok(())
+}
+
+fn highlight(snippet: Snippet) -> String {
+    let mut result = String::new();
+    let mut start_from = 0;
+
+    for (start, end) in snippet.highlighted().iter().map(|h| h.bounds()) {
+        result.push_str(&snippet.fragments()[start_from..start]);
+        result.push_str(" --> ");
+        result.push_str(&snippet.fragments()[start..end]);
+        result.push_str(" <-- ");
+        start_from = end;
+    }
+
+    result.push_str(&snippet.fragments()[start_from..]);
+    result
+}
--- a/examples/stop_words.rs
+++ b/examples/stop_words.rs
@@ -0,0 +1,113 @@
+// # Stop Words Example
+//
+// This example covers the basic usage of stop words
+// with tantivy
+//
+// We will :
+// - define our schema
+// - create an index in a directory
+// - add a few stop words
+// - index few documents in our index
+
+// ---
+// Importing tantivy...
+use tantivy::collector::TopDocs;
+use tantivy::query::QueryParser;
+use tantivy::schema::*;
+use tantivy::tokenizer::*;
+use tantivy::{doc, Index};
+
+fn main() -> tantivy::Result<()> {
+    // this example assumes you understand the content in `basic_search`
+    let mut schema_builder = Schema::builder();
+
+    // This configures your custom options for how tantivy will
+    // store and process your content in the index; The key
+    // to note is that we are setting the tokenizer to `stoppy`
+    // which will be defined and registered below.
+    let text_field_indexing = TextFieldIndexing::default()
+        .set_tokenizer("stoppy")
+        .set_index_option(IndexRecordOption::WithFreqsAndPositions);
+    let text_options = TextOptions::default()
+        .set_indexing_options(text_field_indexing)
+        .set_stored();
+
+    // Our first field is title.
+    schema_builder.add_text_field("title", text_options);
+
+    // Our second field is body.
+    let text_field_indexing = TextFieldIndexing::default()
+        .set_tokenizer("stoppy")
+        .set_index_option(IndexRecordOption::WithFreqsAndPositions);
+    let text_options = TextOptions::default()
+        .set_indexing_options(text_field_indexing)
+        .set_stored();
+    schema_builder.add_text_field("body", text_options);
+
+    let schema = schema_builder.build();
+
+    let index = Index::create_in_ram(schema.clone());
+
+    // This tokenizer lowers all of the text (to help with stop word matching)
+    // then removes all instances of `the` and `and` from the corpus
+    let tokenizer = TextAnalyzer::from(SimpleTokenizer)
+        .filter(LowerCaser)
+        .filter(StopWordFilter::remove(vec![
+            "the".to_string(),
+            "and".to_string(),
+        ]));
+
+    index.tokenizers().register("stoppy", tokenizer);
+
+    let mut index_writer = index.writer(50_000_000)?;
+
+    let title = schema.get_field("title").unwrap();
+    let body = schema.get_field("body").unwrap();
+
+    index_writer.add_document(doc!(
+    title => "The Old Man and the Sea",
+    body => "He was an old man who fished alone in a skiff in the Gulf Stream and \
+     he had gone eighty-four days now without taking a fish."
+    ));
+
+    index_writer.add_document(doc!(
+    title => "Of Mice and Men",
+    body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
+            bank and runs deep and green. The water is warm too, for it has slipped twinkling \
+            over the yellow sands in the sunlight before reaching the narrow pool. On one \
+            side of the river the golden foothill slopes curve up to the strong and rocky \
+            Gabilan Mountains, but on the valley side the water is lined with trees—willows \
+            fresh and green with every spring, carrying in their lower leaf junctures the \
+            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
+            limbs and branches that arch over the pool"
+    ));
+
+    index_writer.add_document(doc!(
+    title => "Frankenstein",
+    body => "You will rejoice to hear that no disaster has accompanied the commencement of an \
+             enterprise which you have regarded with such evil forebodings.  I arrived here \
+             yesterday, and my first task is to assure my dear sister of my welfare and \
+             increasing confidence in the success of my undertaking."
+    ));
+
+    index_writer.commit()?;
+
+    let reader = index.reader()?;
+
+    let searcher = reader.searcher();
+
+    let query_parser = QueryParser::for_index(&index, vec![title, body]);
+
+    // stop words are applied on the query as well.
+    // The following will be equivalent to `title:frankenstein`
+    let query = query_parser.parse_query("title:\"the Frankenstein\"")?;
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
+
+    for (score, doc_address) in top_docs {
+        let retrieved_doc = searcher.doc(doc_address)?;
+        println!("\n==\nDocument score {}:", score);
+        println!("{}", schema.to_json(&retrieved_doc));
+    }
+
+    Ok(())
+}
--- a/examples/working_with_json.rs
+++ b/examples/working_with_json.rs
@@ -0,0 +1,41 @@
+use tantivy;
+use tantivy::schema::*;
+
+// # Document from json
+//
+// For convenience, `Document` can be parsed directly from json.
+fn main() -> tantivy::Result<()> {
+    // Let's first define a schema and an index.
+    // Check out the basic example if this is confusing to you.
+    //
+    // first we need to define a schema ...
+    let mut schema_builder = Schema::builder();
+    schema_builder.add_text_field("title", TEXT | STORED);
+    schema_builder.add_text_field("body", TEXT);
+    schema_builder.add_u64_field("year", INDEXED);
+    let schema = schema_builder.build();
+
+    // Let's assume we have a json-serialized document.
+    let mice_and_men_doc_json = r#"{
+       "title": "Of Mice and Men",
+       "year": 1937
+    }"#;
+
+    // We can parse our document
+    let _mice_and_men_doc = schema.parse_document(&mice_and_men_doc_json)?;
+
+    // Multi-valued field are allowed, they are
+    // expressed in JSON by an array.
+    // The following document has two titles.
+    let frankenstein_json = r#"{
+       "title": ["Frankenstein", "The Modern Prometheus"],
+       "year": 1818
+    }"#;
+    let _frankenstein_doc = schema.parse_document(&frankenstein_json)?;
+
+    // Note that the schema is saved in your index directory.
+    //
+    // As a result, Indexes are aware of their schema, and you can use this feature
+    // just by opening an existing `Index`, and calling `index.schema()..parse_document(json)`.
+    Ok(())
+}
--- a/query-grammar/Cargo.toml
+++ b/query-grammar/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "tantivy-query-grammar"
+version = "0.13.0"
+authors = ["Paul Masurel <paul.masurel@gmail.com>"]
+license = "MIT"
+categories = ["database-implementations", "data-structures"]
+description = """Search engine library"""
+documentation = "https://tantivy-search.github.io/tantivy/tantivy/index.html"
+homepage = "https://github.com/tantivy-search/tantivy"
+repository = "https://github.com/tantivy-search/tantivy"
+readme = "README.md"
+keywords = ["search", "information", "retrieval"]
+edition = "2018"
+
+[dependencies]
+combine = {version="4", default-features=false, features=[] }
--- a/query-grammar/README.md
+++ b/query-grammar/README.md
@@ -0,0 +1,3 @@
+# Tantivy Query Grammar
+
+This crate is used by tantivy to parse queries.
--- a/query-grammar/src/lib.rs
+++ b/query-grammar/src/lib.rs
@@ -0,0 +1,15 @@
+mod occur;
+mod query_grammar;
+mod user_input_ast;
+use combine::parser::Parser;
+
+pub use crate::occur::Occur;
+use crate::query_grammar::parse_to_ast;
+pub use crate::user_input_ast::{UserInputAST, UserInputBound, UserInputLeaf, UserInputLiteral};
+
+pub struct Error;
+
+pub fn parse_query(query: &str) -> Result<UserInputAST, Error> {
+    let (user_input_ast, _remaining) = parse_to_ast().parse(query).map_err(|_| Error)?;
+    Ok(user_input_ast)
+}
--- a/query-grammar/src/occur.rs
+++ b/query-grammar/src/occur.rs
@@ -0,0 +1,72 @@
+use std::fmt;
+use std::fmt::Write;
+
+/// Defines whether a term in a query must be present,
+/// should be present or must be not present.
+#[derive(Debug, Clone, Hash, Copy, Eq, PartialEq)]
+pub enum Occur {
+    /// For a given document to be considered for scoring,
+    /// at least one of the document with the Should or the Must
+    /// Occur constraint must be within the document.
+    Should,
+    /// Document without the term are excluded from the search.
+    Must,
+    /// Document that contain the term are excluded from the
+    /// search.
+    MustNot,
+}
+
+impl Occur {
+    /// Returns the one-char prefix symbol for this `Occur`.
+    /// - `Should` => '?',
+    /// - `Must` => '+'
+    /// - `Not` => '-'
+    fn to_char(self) -> char {
+        match self {
+            Occur::Should => '?',
+            Occur::Must => '+',
+            Occur::MustNot => '-',
+        }
+    }
+
+    /// Compose two occur values.
+    pub fn compose(left: Occur, right: Occur) -> Occur {
+        match (left, right) {
+            (Occur::Should, _) => right,
+            (Occur::Must, Occur::MustNot) => Occur::MustNot,
+            (Occur::Must, _) => Occur::Must,
+            (Occur::MustNot, Occur::MustNot) => Occur::Must,
+            (Occur::MustNot, _) => Occur::MustNot,
+        }
+    }
+}
+
+impl fmt::Display for Occur {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_char(self.to_char())
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::Occur;
+
+    #[test]
+    fn test_Occur_compose() {
+        assert_eq!(Occur::compose(Occur::Should, Occur::Should), Occur::Should);
+        assert_eq!(Occur::compose(Occur::Should, Occur::Must), Occur::Must);
+        assert_eq!(
+            Occur::compose(Occur::Should, Occur::MustNot),
+            Occur::MustNot
+        );
+        assert_eq!(Occur::compose(Occur::Must, Occur::Should), Occur::Must);
+        assert_eq!(Occur::compose(Occur::Must, Occur::Must), Occur::Must);
+        assert_eq!(Occur::compose(Occur::Must, Occur::MustNot), Occur::MustNot);
+        assert_eq!(
+            Occur::compose(Occur::MustNot, Occur::Should),
+            Occur::MustNot
+        );
+        assert_eq!(Occur::compose(Occur::MustNot, Occur::Must), Occur::MustNot);
+        assert_eq!(Occur::compose(Occur::MustNot, Occur::MustNot), Occur::Must);
+    }
+}
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -0,0 +1,510 @@
+use super::user_input_ast::{UserInputAST, UserInputBound, UserInputLeaf, UserInputLiteral};
+use crate::Occur;
+use combine::error::StringStreamError;
+use combine::parser::char::{char, digit, letter, space, spaces, string};
+use combine::parser::Parser;
+use combine::{
+    attempt, choice, eof, many, many1, one_of, optional, parser, satisfy, skip_many1, value,
+};
+
+fn field<'a>() -> impl Parser<&'a str, Output = String> {
+    (
+        letter(),
+        many(satisfy(|c: char| c.is_alphanumeric() || c == '_')),
+    )
+        .skip(char(':'))
+        .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
+}
+
+fn word<'a>() -> impl Parser<&'a str, Output = String> {
+    (
+        satisfy(|c: char| {
+            !c.is_whitespace()
+                && !['-', '^', '`', ':', '{', '}', '"', '[', ']', '(', ')'].contains(&c)
+        }),
+        many(satisfy(|c: char| {
+            !c.is_whitespace() && ![':', '^', '{', '}', '"', '[', ']', '(', ')'].contains(&c)
+        })),
+    )
+        .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
+        .and_then(|s: String| match s.as_str() {
+            "OR" | "AND " | "NOT" => Err(StringStreamError::UnexpectedParse),
+            _ => Ok(s),
+        })
+}
+
+fn term_val<'a>() -> impl Parser<&'a str, Output = String> {
+    let phrase = char('"').with(many1(satisfy(|c| c != '"'))).skip(char('"'));
+    phrase.or(word())
+}
+
+fn term_query<'a>() -> impl Parser<&'a str, Output = UserInputLiteral> {
+    let term_val_with_field = negative_number().or(term_val());
+    (field(), term_val_with_field).map(|(field_name, phrase)| UserInputLiteral {
+        field_name: Some(field_name),
+        phrase,
+    })
+}
+
+fn literal<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
+    let term_default_field = term_val().map(|phrase| UserInputLiteral {
+        field_name: None,
+        phrase,
+    });
+    attempt(term_query())
+        .or(term_default_field)
+        .map(UserInputLeaf::from)
+}
+
+fn negative_number<'a>() -> impl Parser<&'a str, Output = String> {
+    (
+        char('-'),
+        many1(digit()),
+        optional((char('.'), many1(digit()))),
+    )
+        .map(|(s1, s2, s3): (char, String, Option<(char, String)>)| {
+            if let Some(('.', s3)) = s3 {
+                format!("{}{}.{}", s1, s2, s3)
+            } else {
+                format!("{}{}", s1, s2)
+            }
+        })
+}
+
+fn spaces1<'a>() -> impl Parser<&'a str, Output = ()> {
+    skip_many1(space())
+}
+
+/// Function that parses a range out of a Stream
+/// Supports ranges like:
+/// [5 TO 10], {5 TO 10}, [* TO 10], [10 TO *], {10 TO *], >5, <=10
+/// [a TO *], [a TO c], [abc TO bcd}
+fn range<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
+    let range_term_val = || {
+        word()
+            .or(negative_number())
+            .or(char('*').with(value("*".to_string())))
+    };
+
+    // check for unbounded range in the form of <5, <=10, >5, >=5
+    let elastic_unbounded_range = (
+        choice([
+            attempt(string(">=")),
+            attempt(string("<=")),
+            attempt(string("<")),
+            attempt(string(">")),
+        ])
+        .skip(spaces()),
+        range_term_val(),
+    )
+        .map(
+            |(comparison_sign, bound): (&str, String)| match comparison_sign {
+                ">=" => (UserInputBound::Inclusive(bound), UserInputBound::Unbounded),
+                "<=" => (UserInputBound::Unbounded, UserInputBound::Inclusive(bound)),
+                "<" => (UserInputBound::Unbounded, UserInputBound::Exclusive(bound)),
+                ">" => (UserInputBound::Exclusive(bound), UserInputBound::Unbounded),
+                // default case
+                _ => (UserInputBound::Unbounded, UserInputBound::Unbounded),
+            },
+        );
+    let lower_bound = (one_of("{[".chars()), range_term_val()).map(
+        |(boundary_char, lower_bound): (char, String)| {
+            if lower_bound == "*" {
+                UserInputBound::Unbounded
+            } else if boundary_char == '{' {
+                UserInputBound::Exclusive(lower_bound)
+            } else {
+                UserInputBound::Inclusive(lower_bound)
+            }
+        },
+    );
+    let upper_bound = (range_term_val(), one_of("}]".chars())).map(
+        |(higher_bound, boundary_char): (String, char)| {
+            if higher_bound == "*" {
+                UserInputBound::Unbounded
+            } else if boundary_char == '}' {
+                UserInputBound::Exclusive(higher_bound)
+            } else {
+                UserInputBound::Inclusive(higher_bound)
+            }
+        },
+    );
+    // return only lower and upper
+    let lower_to_upper = (
+        lower_bound.skip((spaces(), string("TO"), spaces())),
+        upper_bound,
+    );
+
+    (
+        optional(field()).skip(spaces()),
+        // try elastic first, if it matches, the range is unbounded
+        attempt(elastic_unbounded_range).or(lower_to_upper),
+    )
+        .map(|(field, (lower, upper))|
+             // Construct the leaf from extracted field (optional)
+             // and bounds
+             UserInputLeaf::Range {
+                 field,
+                 lower,
+                 upper
+    })
+}
+
+fn negate(expr: UserInputAST) -> UserInputAST {
+    expr.unary(Occur::MustNot)
+}
+
+fn leaf<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
+    parser(|input| {
+        char('(')
+            .with(ast())
+            .skip(char(')'))
+            .or(char('*').map(|_| UserInputAST::from(UserInputLeaf::All)))
+            .or(attempt(
+                string("NOT").skip(spaces1()).with(leaf()).map(negate),
+            ))
+            .or(attempt(range().map(UserInputAST::from)))
+            .or(literal().map(UserInputAST::from))
+            .parse_stream(input)
+            .into_result()
+    })
+}
+
+fn occur_symbol<'a>() -> impl Parser<&'a str, Output = Occur> {
+    char('-')
+        .map(|_| Occur::MustNot)
+        .or(char('+').map(|_| Occur::Must))
+}
+
+fn occur_leaf<'a>() -> impl Parser<&'a str, Output = (Option<Occur>, UserInputAST)> {
+    (optional(occur_symbol()), boosted_leaf())
+}
+
+fn positive_float_number<'a>() -> impl Parser<&'a str, Output = f64> {
+    (many1(digit()), optional((char('.'), many1(digit())))).map(
+        |(int_part, decimal_part_opt): (String, Option<(char, String)>)| {
+            let mut float_str = int_part;
+            if let Some((chr, decimal_str)) = decimal_part_opt {
+                float_str.push(chr);
+                float_str.push_str(&decimal_str);
+            }
+            float_str.parse::<f64>().unwrap()
+        },
+    )
+}
+
+fn boost<'a>() -> impl Parser<&'a str, Output = f64> {
+    (char('^'), positive_float_number()).map(|(_, boost)| boost)
+}
+
+fn boosted_leaf<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
+    (leaf(), optional(boost())).map(|(leaf, boost_opt)| match boost_opt {
+        Some(boost) if (boost - 1.0).abs() > std::f64::EPSILON => {
+            UserInputAST::Boost(Box::new(leaf), boost)
+        }
+        _ => leaf,
+    })
+}
+
+#[derive(Clone, Copy)]
+enum BinaryOperand {
+    Or,
+    And,
+}
+
+fn binary_operand<'a>() -> impl Parser<&'a str, Output = BinaryOperand> {
+    string("AND")
+        .with(value(BinaryOperand::And))
+        .or(string("OR").with(value(BinaryOperand::Or)))
+}
+
+fn aggregate_binary_expressions(
+    left: UserInputAST,
+    others: Vec<(BinaryOperand, UserInputAST)>,
+) -> UserInputAST {
+    let mut dnf: Vec<Vec<UserInputAST>> = vec![vec![left]];
+    for (operator, operand_ast) in others {
+        match operator {
+            BinaryOperand::And => {
+                if let Some(last) = dnf.last_mut() {
+                    last.push(operand_ast);
+                }
+            }
+            BinaryOperand::Or => {
+                dnf.push(vec![operand_ast]);
+            }
+        }
+    }
+    if dnf.len() == 1 {
+        UserInputAST::and(dnf.into_iter().next().unwrap()) //< safe
+    } else {
+        let conjunctions = dnf.into_iter().map(UserInputAST::and).collect();
+        UserInputAST::or(conjunctions)
+    }
+}
+
+fn operand_leaf<'a>() -> impl Parser<&'a str, Output = (BinaryOperand, UserInputAST)> {
+    (
+        binary_operand().skip(spaces()),
+        boosted_leaf().skip(spaces()),
+    )
+}
+
+pub fn ast<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
+    let boolean_expr = (boosted_leaf().skip(spaces()), many1(operand_leaf()))
+        .map(|(left, right)| aggregate_binary_expressions(left, right));
+    let whitespace_separated_leaves = many1(occur_leaf().skip(spaces().silent())).map(
+        |subqueries: Vec<(Option<Occur>, UserInputAST)>| {
+            if subqueries.len() == 1 {
+                let (occur_opt, ast) = subqueries.into_iter().next().unwrap();
+                match occur_opt.unwrap_or(Occur::Should) {
+                    Occur::Must | Occur::Should => ast,
+                    Occur::MustNot => UserInputAST::Clause(vec![(Some(Occur::MustNot), ast)]),
+                }
+            } else {
+                UserInputAST::Clause(subqueries.into_iter().collect())
+            }
+        },
+    );
+    let expr = attempt(boolean_expr).or(whitespace_separated_leaves);
+    spaces().with(expr).skip(spaces())
+}
+
+pub fn parse_to_ast<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
+    spaces()
+        .with(optional(ast()).skip(eof()))
+        .map(|opt_ast| opt_ast.unwrap_or_else(UserInputAST::empty_query))
+}
+
+#[cfg(test)]
+mod test {
+
+    use super::*;
+    use combine::parser::Parser;
+
+    pub fn nearly_equals(a: f64, b: f64) -> bool {
+        (a - b).abs() < 0.0005 * (a + b).abs()
+    }
+
+    fn assert_nearly_equals(expected: f64, val: f64) {
+        assert!(
+            nearly_equals(val, expected),
+            "Got {}, expected {}.",
+            val,
+            expected
+        );
+    }
+
+    #[test]
+    fn test_occur_symbol() {
+        assert_eq!(super::occur_symbol().parse("-"), Ok((Occur::MustNot, "")));
+        assert_eq!(super::occur_symbol().parse("+"), Ok((Occur::Must, "")));
+    }
+
+    #[test]
+    fn test_positive_float_number() {
+        fn valid_parse(float_str: &str, expected_val: f64, expected_remaining: &str) {
+            let (val, remaining) = positive_float_number().parse(float_str).unwrap();
+            assert_eq!(remaining, expected_remaining);
+            assert_nearly_equals(val, expected_val);
+        }
+        fn error_parse(float_str: &str) {
+            assert!(positive_float_number().parse(float_str).is_err());
+        }
+        valid_parse("1.0", 1.0, "");
+        valid_parse("1", 1.0, "");
+        valid_parse("0.234234 aaa", 0.234234f64, " aaa");
+        error_parse(".3332");
+        error_parse("1.");
+        error_parse("-1.");
+    }
+
+    fn test_parse_query_to_ast_helper(query: &str, expected: &str) {
+        let query = parse_to_ast().parse(query).unwrap().0;
+        let query_str = format!("{:?}", query);
+        assert_eq!(query_str, expected);
+    }
+
+    fn test_is_parse_err(query: &str) {
+        assert!(parse_to_ast().parse(query).is_err());
+    }
+
+    #[test]
+    fn test_parse_empty_to_ast() {
+        test_parse_query_to_ast_helper("", "<emptyclause>");
+    }
+
+    #[test]
+    fn test_parse_query_to_ast_hyphen() {
+        test_parse_query_to_ast_helper("\"www-form-encoded\"", "\"www-form-encoded\"");
+        test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
+        test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
+    }
+
+    #[test]
+    fn test_parse_query_to_ast_not_op() {
+        assert_eq!(
+            format!("{:?}", parse_to_ast().parse("NOT")),
+            "Err(UnexpectedParse)"
+        );
+        test_parse_query_to_ast_helper("NOTa", "\"NOTa\"");
+        test_parse_query_to_ast_helper("NOT a", "(-\"a\")");
+    }
+
+    #[test]
+    fn test_boosting() {
+        assert!(parse_to_ast().parse("a^2^3").is_err());
+        assert!(parse_to_ast().parse("a^2^").is_err());
+        test_parse_query_to_ast_helper("a^3", "(\"a\")^3");
+        test_parse_query_to_ast_helper("a^3 b^2", "(*(\"a\")^3 *(\"b\")^2)");
+        test_parse_query_to_ast_helper("a^1", "\"a\"");
+    }
+
+    #[test]
+    fn test_parse_query_to_ast_binary_op() {
+        test_parse_query_to_ast_helper("a AND b", "(+\"a\" +\"b\")");
+        test_parse_query_to_ast_helper("a OR b", "(?\"a\" ?\"b\")");
+        test_parse_query_to_ast_helper("a OR b AND c", "(?\"a\" ?(+\"b\" +\"c\"))");
+        test_parse_query_to_ast_helper("a AND b         AND c", "(+\"a\" +\"b\" +\"c\")");
+        assert_eq!(
+            format!("{:?}", parse_to_ast().parse("a OR b aaa")),
+            "Err(UnexpectedParse)"
+        );
+        assert_eq!(
+            format!("{:?}", parse_to_ast().parse("a AND b aaa")),
+            "Err(UnexpectedParse)"
+        );
+        assert_eq!(
+            format!("{:?}", parse_to_ast().parse("aaa a OR b ")),
+            "Err(UnexpectedParse)"
+        );
+        assert_eq!(
+            format!("{:?}", parse_to_ast().parse("aaa ccc a OR b ")),
+            "Err(UnexpectedParse)"
+        );
+    }
+
+    #[test]
+    fn test_parse_elastic_query_ranges() {
+        test_parse_query_to_ast_helper("title: >a", "title:{\"a\" TO \"*\"}");
+        test_parse_query_to_ast_helper("title:>=a", "title:[\"a\" TO \"*\"}");
+        test_parse_query_to_ast_helper("title: <a", "title:{\"*\" TO \"a\"}");
+        test_parse_query_to_ast_helper("title:<=a", "title:{\"*\" TO \"a\"]");
+        test_parse_query_to_ast_helper("title:<=bsd", "title:{\"*\" TO \"bsd\"]");
+
+        test_parse_query_to_ast_helper("weight: >70", "weight:{\"70\" TO \"*\"}");
+        test_parse_query_to_ast_helper("weight:>=70", "weight:[\"70\" TO \"*\"}");
+        test_parse_query_to_ast_helper("weight: <70", "weight:{\"*\" TO \"70\"}");
+        test_parse_query_to_ast_helper("weight:<=70", "weight:{\"*\" TO \"70\"]");
+        test_parse_query_to_ast_helper("weight: >60.7", "weight:{\"60.7\" TO \"*\"}");
+
+        test_parse_query_to_ast_helper("weight: <= 70", "weight:{\"*\" TO \"70\"]");
+
+        test_parse_query_to_ast_helper("weight: <= 70.5", "weight:{\"*\" TO \"70.5\"]");
+    }
+
+    #[test]
+    fn test_occur_leaf() {
+        let ((occur, ast), _) = super::occur_leaf().parse("+abc").unwrap();
+        assert_eq!(occur, Some(Occur::Must));
+        assert_eq!(format!("{:?}", ast), "\"abc\"");
+    }
+
+    #[test]
+    fn test_range_parser() {
+        // testing the range() parser separately
+        let res = range().parse("title: <hello").unwrap().0;
+        let expected = UserInputLeaf::Range {
+            field: Some("title".to_string()),
+            lower: UserInputBound::Unbounded,
+            upper: UserInputBound::Exclusive("hello".to_string()),
+        };
+        let res2 = range().parse("title:{* TO hello}").unwrap().0;
+        assert_eq!(res, expected);
+        assert_eq!(res2, expected);
+        let expected_weight = UserInputLeaf::Range {
+            field: Some("weight".to_string()),
+            lower: UserInputBound::Inclusive("71.2".to_string()),
+            upper: UserInputBound::Unbounded,
+        };
+
+        let res3 = range().parse("weight: >=71.2").unwrap().0;
+        let res4 = range().parse("weight:[71.2 TO *}").unwrap().0;
+        assert_eq!(res3, expected_weight);
+        assert_eq!(res4, expected_weight);
+    }
+
+    #[test]
+    fn test_parse_query_to_triming_spaces() {
+        test_parse_query_to_ast_helper("   abc", "\"abc\"");
+        test_parse_query_to_ast_helper("abc ", "\"abc\"");
+        test_parse_query_to_ast_helper("(  a OR abc)", "(?\"a\" ?\"abc\")");
+        test_parse_query_to_ast_helper("(a  OR abc)", "(?\"a\" ?\"abc\")");
+        test_parse_query_to_ast_helper("(a OR  abc)", "(?\"a\" ?\"abc\")");
+        test_parse_query_to_ast_helper("a OR abc ", "(?\"a\" ?\"abc\")");
+        test_parse_query_to_ast_helper("(a OR abc )", "(?\"a\" ?\"abc\")");
+        test_parse_query_to_ast_helper("(a OR  abc) ", "(?\"a\" ?\"abc\")");
+    }
+
+    #[test]
+    fn test_parse_query_single_term() {
+        test_parse_query_to_ast_helper("abc", "\"abc\"");
+    }
+
+    #[test]
+    fn test_parse_query_default_clause() {
+        test_parse_query_to_ast_helper("a b", "(*\"a\" *\"b\")");
+    }
+
+    #[test]
+    fn test_parse_query_must_default_clause() {
+        test_parse_query_to_ast_helper("+(a b)", "(*\"a\" *\"b\")");
+    }
+
+    #[test]
+    fn test_parse_query_must_single_term() {
+        test_parse_query_to_ast_helper("+d", "\"d\"");
+    }
+
+    #[test]
+    fn test_single_term_with_field() {
+        test_parse_query_to_ast_helper("abc:toto", "abc:\"toto\"");
+    }
+
+    #[test]
+    fn test_single_term_with_float() {
+        test_parse_query_to_ast_helper("abc:1.1", "abc:\"1.1\"");
+    }
+
+    #[test]
+    fn test_must_clause() {
+        test_parse_query_to_ast_helper("(+a +b)", "(+\"a\" +\"b\")");
+    }
+
+    #[test]
+    fn test_parse_test_query_plus_a_b_plus_d() {
+        test_parse_query_to_ast_helper("+(a b) +d", "(+(*\"a\" *\"b\") +\"d\")");
+    }
+
+    #[test]
+    fn test_parse_test_query_other() {
+        test_parse_query_to_ast_helper("(+a +b) d", "(*(+\"a\" +\"b\") *\"d\")");
+        test_parse_query_to_ast_helper("+abc:toto", "abc:\"toto\"");
+        test_parse_query_to_ast_helper("(+abc:toto -titi)", "(+abc:\"toto\" -\"titi\")");
+        test_parse_query_to_ast_helper("-abc:toto", "(-abc:\"toto\")");
+        test_parse_query_to_ast_helper("abc:a b", "(*abc:\"a\" *\"b\")");
+        test_parse_query_to_ast_helper("abc:\"a b\"", "abc:\"a b\"");
+        test_parse_query_to_ast_helper("foo:[1 TO 5]", "foo:[\"1\" TO \"5\"]");
+    }
+
+    #[test]
+    fn test_parse_query_with_range() {
+        test_parse_query_to_ast_helper("[1 TO 5]", "[\"1\" TO \"5\"]");
+        test_parse_query_to_ast_helper("foo:{a TO z}", "foo:{\"a\" TO \"z\"}");
+        test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}");
+        test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:{\"*\" TO \"toto\"}");
+        test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}");
+        test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}");
+        test_is_parse_err("abc +    ");
+    }
+}
--- a/query-grammar/src/user_input_ast.rs
+++ b/query-grammar/src/user_input_ast.rs
@@ -0,0 +1,171 @@
+use std::fmt;
+use std::fmt::{Debug, Formatter};
+
+use crate::Occur;
+
+#[derive(PartialEq)]
+pub enum UserInputLeaf {
+    Literal(UserInputLiteral),
+    All,
+    Range {
+        field: Option<String>,
+        lower: UserInputBound,
+        upper: UserInputBound,
+    },
+}
+
+impl Debug for UserInputLeaf {
+    fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), fmt::Error> {
+        match self {
+            UserInputLeaf::Literal(literal) => literal.fmt(formatter),
+            UserInputLeaf::Range {
+                ref field,
+                ref lower,
+                ref upper,
+            } => {
+                if let Some(ref field) = field {
+                    write!(formatter, "{}:", field)?;
+                }
+                lower.display_lower(formatter)?;
+                write!(formatter, " TO ")?;
+                upper.display_upper(formatter)?;
+                Ok(())
+            }
+            UserInputLeaf::All => write!(formatter, "*"),
+        }
+    }
+}
+
+#[derive(PartialEq)]
+pub struct UserInputLiteral {
+    pub field_name: Option<String>,
+    pub phrase: String,
+}
+
+impl fmt::Debug for UserInputLiteral {
+    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        match self.field_name {
+            Some(ref field_name) => write!(formatter, "{}:\"{}\"", field_name, self.phrase),
+            None => write!(formatter, "\"{}\"", self.phrase),
+        }
+    }
+}
+
+#[derive(PartialEq)]
+pub enum UserInputBound {
+    Inclusive(String),
+    Exclusive(String),
+    Unbounded,
+}
+
+impl UserInputBound {
+    fn display_lower(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        match *self {
+            UserInputBound::Inclusive(ref word) => write!(formatter, "[\"{}\"", word),
+            UserInputBound::Exclusive(ref word) => write!(formatter, "{{\"{}\"", word),
+            UserInputBound::Unbounded => write!(formatter, "{{\"*\""),
+        }
+    }
+
+    fn display_upper(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        match *self {
+            UserInputBound::Inclusive(ref word) => write!(formatter, "\"{}\"]", word),
+            UserInputBound::Exclusive(ref word) => write!(formatter, "\"{}\"}}", word),
+            UserInputBound::Unbounded => write!(formatter, "\"*\"}}"),
+        }
+    }
+
+    pub fn term_str(&self) -> &str {
+        match *self {
+            UserInputBound::Inclusive(ref contents) => contents,
+            UserInputBound::Exclusive(ref contents) => contents,
+            UserInputBound::Unbounded => &"*",
+        }
+    }
+}
+
+pub enum UserInputAST {
+    Clause(Vec<(Option<Occur>, UserInputAST)>),
+    Leaf(Box<UserInputLeaf>),
+    Boost(Box<UserInputAST>, f64),
+}
+
+impl UserInputAST {
+    pub fn unary(self, occur: Occur) -> UserInputAST {
+        UserInputAST::Clause(vec![(Some(occur), self)])
+    }
+
+    fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST {
+        assert_ne!(occur, Occur::MustNot);
+        assert!(!asts.is_empty());
+        if asts.len() == 1 {
+            asts.into_iter().next().unwrap() //< safe
+        } else {
+            UserInputAST::Clause(
+                asts.into_iter()
+                    .map(|ast: UserInputAST| (Some(occur), ast))
+                    .collect::<Vec<_>>(),
+            )
+        }
+    }
+
+    pub fn empty_query() -> UserInputAST {
+        UserInputAST::Clause(Vec::default())
+    }
+
+    pub fn and(asts: Vec<UserInputAST>) -> UserInputAST {
+        UserInputAST::compose(Occur::Must, asts)
+    }
+
+    pub fn or(asts: Vec<UserInputAST>) -> UserInputAST {
+        UserInputAST::compose(Occur::Should, asts)
+    }
+}
+
+impl From<UserInputLiteral> for UserInputLeaf {
+    fn from(literal: UserInputLiteral) -> UserInputLeaf {
+        UserInputLeaf::Literal(literal)
+    }
+}
+
+impl From<UserInputLeaf> for UserInputAST {
+    fn from(leaf: UserInputLeaf) -> UserInputAST {
+        UserInputAST::Leaf(Box::new(leaf))
+    }
+}
+
+fn print_occur_ast(
+    occur_opt: Option<Occur>,
+    ast: &UserInputAST,
+    formatter: &mut fmt::Formatter,
+) -> fmt::Result {
+    if let Some(occur) = occur_opt {
+        write!(formatter, "{}{:?}", occur, ast)?;
+    } else {
+        write!(formatter, "*{:?}", ast)?;
+    }
+    Ok(())
+}
+
+impl fmt::Debug for UserInputAST {
+    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            UserInputAST::Clause(ref subqueries) => {
+                if subqueries.is_empty() {
+                    write!(formatter, "<emptyclause>")?;
+                } else {
+                    write!(formatter, "(")?;
+                    print_occur_ast(subqueries[0].0, &subqueries[0].1, formatter)?;
+                    for subquery in &subqueries[1..] {
+                        write!(formatter, " ")?;
+                        print_occur_ast(subquery.0, &subquery.1, formatter)?;
+                    }
+                    write!(formatter, ")")?;
+                }
+                Ok(())
+            }
+            UserInputAST::Leaf(ref subquery) => write!(formatter, "{:?}", subquery),
+            UserInputAST::Boost(ref leaf, boost) => write!(formatter, "({:?})^{}", leaf, boost),
+        }
+    }
+}
--- a/run-tests.sh
+++ b/run-tests.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+cargo test
--- a/src/collector/chained_collector.rs
+++ b/src/collector/chained_collector.rs
@@ -1,142 +0,0 @@
-use collector::Collector;
-use DocId;
-use Result;
-use Score;
-use SegmentLocalId;
-use SegmentReader;
-
-/// Collector that does nothing.
-/// This is used in the chain Collector and will hopefully
-/// be optimized away by the compiler.
-pub struct DoNothingCollector;
-impl Collector for DoNothingCollector {
-    #[inline]
-    fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
-        Ok(())
-    }
-    #[inline]
-    fn collect(&mut self, _doc: DocId, _score: Score) {}
-    #[inline]
-    fn requires_scoring(&self) -> bool {
-        false
-    }
-}
-
-/// Zero-cost abstraction used to collect on multiple collectors.
-/// This contraption is only usable if the type of your collectors
-/// are known at compile time.
-///
-/// ```rust
-/// #[macro_use]
-/// extern crate tantivy;
-/// use tantivy::schema::{SchemaBuilder, TEXT};
-/// use tantivy::{Index, Result};
-/// use tantivy::collector::{CountCollector, TopCollector, chain};
-/// use tantivy::query::QueryParser;
-///
-/// # fn main() { example().unwrap(); }
-/// fn example() -> Result<()> {
-///     let mut schema_builder = SchemaBuilder::new();
-///     let title = schema_builder.add_text_field("title", TEXT);
-///     let schema = schema_builder.build();
-///     let index = Index::create_in_ram(schema);
-///     {
-///         let mut index_writer = index.writer(3_000_000)?;
-///         index_writer.add_document(doc!(
-///             title => "The Name of the Wind",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "The Diary of Muadib",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "A Dairy Cow",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "The Diary of a Young Girl",
-///         ));
-///         index_writer.commit().unwrap();
-///     }
-///
-///     index.load_searchers()?;
-///     let searcher = index.searcher();
-///
-///     {
-///         let mut top_collector = TopCollector::with_limit(2);
-///         let mut count_collector = CountCollector::default();
-///         {
-///             let mut collectors = chain().push(&mut top_collector).push(&mut count_collector);
-///             let query_parser = QueryParser::for_index(&index, vec![title]);
-///             let query = query_parser.parse_query("diary")?;
-///             searcher.search(&*query, &mut collectors).unwrap();
-///         }
-///         assert_eq!(count_collector.count(), 2);
-///         assert!(top_collector.at_capacity());
-///     }
-///
-///     Ok(())
-/// }
-/// ```
-pub struct ChainedCollector<Left: Collector, Right: Collector> {
-    left: Left,
-    right: Right,
-}
-
-impl<Left: Collector, Right: Collector> ChainedCollector<Left, Right> {
-    /// Adds a collector
-    pub fn push<C: Collector>(self, new_collector: &mut C) -> ChainedCollector<Self, &mut C> {
-        ChainedCollector {
-            left: self,
-            right: new_collector,
-        }
-    }
-}
-
-impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Right> {
-    fn set_segment(
-        &mut self,
-        segment_local_id: SegmentLocalId,
-        segment: &SegmentReader,
-    ) -> Result<()> {
-        self.left.set_segment(segment_local_id, segment)?;
-        self.right.set_segment(segment_local_id, segment)?;
-        Ok(())
-    }
-
-    fn collect(&mut self, doc: DocId, score: Score) {
-        self.left.collect(doc, score);
-        self.right.collect(doc, score);
-    }
-
-    fn requires_scoring(&self) -> bool {
-        self.left.requires_scoring() || self.right.requires_scoring()
-    }
-}
-
-/// Creates a `ChainedCollector`
-pub fn chain() -> ChainedCollector<DoNothingCollector, DoNothingCollector> {
-    ChainedCollector {
-        left: DoNothingCollector,
-        right: DoNothingCollector,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use collector::{Collector, CountCollector, TopCollector};
-
-    #[test]
-    fn test_chained_collector() {
-        let mut top_collector = TopCollector::with_limit(2);
-        let mut count_collector = CountCollector::default();
-        {
-            let mut collectors = chain().push(&mut top_collector).push(&mut count_collector);
-            collectors.collect(1, 0.2);
-            collectors.collect(2, 0.1);
-            collectors.collect(3, 0.5);
-        }
-        assert_eq!(count_collector.count(), 3);
-        assert!(top_collector.at_capacity());
-    }
-}
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -1,101 +1,114 @@
 use super::Collector;
-use DocId;
-use Result;
-use Score;
-use SegmentLocalId;
-use SegmentReader;
+use crate::collector::SegmentCollector;
+use crate::DocId;
+use crate::Score;
+use crate::SegmentLocalId;
+use crate::SegmentReader;

 /// `CountCollector` collector only counts how many
 /// documents match the query.
 ///
 /// ```rust
-/// #[macro_use]
-/// extern crate tantivy;
-/// use tantivy::schema::{SchemaBuilder, TEXT};
-/// use tantivy::{Index, Result};
-/// use tantivy::collector::CountCollector;
+/// use tantivy::collector::Count;
 /// use tantivy::query::QueryParser;
+/// use tantivy::schema::{Schema, TEXT};
+/// use tantivy::{doc, Index};
 ///
-/// # fn main() { example().unwrap(); }
-/// fn example() -> Result<()> {
-///     let mut schema_builder = SchemaBuilder::new();
-///     let title = schema_builder.add_text_field("title", TEXT);
-///     let schema = schema_builder.build();
-///     let index = Index::create_in_ram(schema);
-///     {
-///         let mut index_writer = index.writer(3_000_000)?;
-///         index_writer.add_document(doc!(
-///             title => "The Name of the Wind",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "The Diary of Muadib",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "A Dairy Cow",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "The Diary of a Young Girl",
-///         ));
-///         index_writer.commit().unwrap();
-///     }
+/// let mut schema_builder = Schema::builder();
+/// let title = schema_builder.add_text_field("title", TEXT);
+/// let schema = schema_builder.build();
+/// let index = Index::create_in_ram(schema);
 ///
-///     index.load_searchers()?;
-///     let searcher = index.searcher();
+/// let mut index_writer = index.writer(3_000_000).unwrap();
+/// index_writer.add_document(doc!(title => "The Name of the Wind"));
+/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
+/// index_writer.add_document(doc!(title => "A Dairy Cow"));
+/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
+/// assert!(index_writer.commit().is_ok());
 ///
-///     {
-///	        let mut count_collector = CountCollector::default();
-///         let query_parser = QueryParser::for_index(&index, vec![title]);
-///         let query = query_parser.parse_query("diary")?;
-///         searcher.search(&*query, &mut count_collector).unwrap();
+/// let reader = index.reader().unwrap();
+/// let searcher = reader.searcher();
 ///
-///         assert_eq!(count_collector.count(), 2);
-///     }
+/// // Here comes the important part
+/// let query_parser = QueryParser::for_index(&index, vec![title]);
+/// let query = query_parser.parse_query("diary").unwrap();
+/// let count = searcher.search(&query, &Count).unwrap();
 ///
-///     Ok(())
-/// }
+/// assert_eq!(count, 2);
 /// ```
-#[derive(Default)]
-pub struct CountCollector {
-    count: usize,
-}
+pub struct Count;

-impl CountCollector {
-    /// Returns the count of documents that were
-    /// collected.
-    pub fn count(&self) -> usize {
-        self.count
-    }
-}
+impl Collector for Count {
+    type Fruit = usize;

-impl Collector for CountCollector {
-    fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
-        Ok(())
-    }
+    type Child = SegmentCountCollector;

-    fn collect(&mut self, _: DocId, _: Score) {
-        self.count += 1;
+    fn for_segment(
+        &self,
+        _: SegmentLocalId,
+        _: &SegmentReader,
+    ) -> crate::Result<SegmentCountCollector> {
+        Ok(SegmentCountCollector::default())
    }

    fn requires_scoring(&self) -> bool {
        false
    }
+
+    fn merge_fruits(&self, segment_counts: Vec<usize>) -> crate::Result<usize> {
+        Ok(segment_counts.into_iter().sum())
+    }
+}
+
+#[derive(Default)]
+pub struct SegmentCountCollector {
+    count: usize,
+}
+
+impl SegmentCollector for SegmentCountCollector {
+    type Fruit = usize;
+
+    fn collect(&mut self, _: DocId, _: Score) {
+        self.count += 1;
+    }
+
+    fn harvest(self) -> usize {
+        self.count
+    }
 }

 #[cfg(test)]
 mod tests {
-
-    use collector::{Collector, CountCollector};
+    use super::{Count, SegmentCountCollector};
+    use crate::collector::Collector;
+    use crate::collector::SegmentCollector;

    #[test]
-    fn test_count_collector() {
-        let mut count_collector = CountCollector::default();
-        assert_eq!(count_collector.count(), 0);
-        count_collector.collect(0u32, 1f32);
-        assert_eq!(count_collector.count(), 1);
-        assert_eq!(count_collector.count(), 1);
-        count_collector.collect(1u32, 1f32);
-        assert_eq!(count_collector.count(), 2);
-        assert!(!count_collector.requires_scoring());
+    fn test_count_collect_does_not_requires_scoring() {
+        assert!(!Count.requires_scoring());
    }

+    #[test]
+    fn test_segment_count_collector() {
+        {
+            let count_collector = SegmentCountCollector::default();
+            assert_eq!(count_collector.harvest(), 0);
+        }
+        {
+            let mut count_collector = SegmentCountCollector::default();
+            count_collector.collect(0u32, 1.0);
+            assert_eq!(count_collector.harvest(), 1);
+        }
+        {
+            let mut count_collector = SegmentCountCollector::default();
+            count_collector.collect(0u32, 1.0);
+            assert_eq!(count_collector.harvest(), 1);
+        }
+        {
+            let mut count_collector = SegmentCountCollector::default();
+            count_collector.collect(0u32, 1.0);
+            count_collector.collect(1u32, 1.0);
+            assert_eq!(count_collector.harvest(), 2);
+        }
+    }
 }
--- a/src/collector/custom_score_top_collector.rs
+++ b/src/collector/custom_score_top_collector.rs
@@ -0,0 +1,125 @@
+use crate::collector::top_collector::{TopCollector, TopSegmentCollector};
+use crate::collector::{Collector, SegmentCollector};
+use crate::{DocAddress, DocId, Score, SegmentReader};
+
+pub(crate) struct CustomScoreTopCollector<TCustomScorer, TScore = Score> {
+    custom_scorer: TCustomScorer,
+    collector: TopCollector<TScore>,
+}
+
+impl<TCustomScorer, TScore> CustomScoreTopCollector<TCustomScorer, TScore>
+where
+    TScore: Clone + PartialOrd,
+{
+    pub(crate) fn new(
+        custom_scorer: TCustomScorer,
+        collector: TopCollector<TScore>,
+    ) -> CustomScoreTopCollector<TCustomScorer, TScore> {
+        CustomScoreTopCollector {
+            custom_scorer,
+            collector,
+        }
+    }
+}
+
+/// A custom segment scorer makes it possible to define any kind of score
+/// for a given document belonging to a specific segment.
+///
+/// It is the segment local version of the [`CustomScorer`](./trait.CustomScorer.html).
+pub trait CustomSegmentScorer<TScore>: 'static {
+    /// Computes the score of a specific `doc`.
+    fn score(&mut self, doc: DocId) -> TScore;
+}
+
+/// `CustomScorer` makes it possible to define any kind of score.
+///
+/// The `CustomerScorer` itself does not make much of the computation itself.
+/// Instead, it helps constructing `Self::Child` instances that will compute
+/// the score at a segment scale.
+pub trait CustomScorer<TScore>: Sync {
+    /// Type of the associated [`CustomSegmentScorer`](./trait.CustomSegmentScorer.html).
+    type Child: CustomSegmentScorer<TScore>;
+    /// Builds a child scorer for a specific segment. The child scorer is associated to
+    /// a specific segment.
+    fn segment_scorer(&self, segment_reader: &SegmentReader) -> crate::Result<Self::Child>;
+}
+
+impl<TCustomScorer, TScore> Collector for CustomScoreTopCollector<TCustomScorer, TScore>
+where
+    TCustomScorer: CustomScorer<TScore> + Send + Sync,
+    TScore: 'static + PartialOrd + Clone + Send + Sync,
+{
+    type Fruit = Vec<(TScore, DocAddress)>;
+
+    type Child = CustomScoreTopSegmentCollector<TCustomScorer::Child, TScore>;
+
+    fn for_segment(
+        &self,
+        segment_local_id: u32,
+        segment_reader: &SegmentReader,
+    ) -> crate::Result<Self::Child> {
+        let segment_scorer = self.custom_scorer.segment_scorer(segment_reader)?;
+        let segment_collector = self
+            .collector
+            .for_segment(segment_local_id, segment_reader)?;
+        Ok(CustomScoreTopSegmentCollector {
+            segment_collector,
+            segment_scorer,
+        })
+    }
+
+    fn requires_scoring(&self) -> bool {
+        false
+    }
+
+    fn merge_fruits(&self, segment_fruits: Vec<Self::Fruit>) -> crate::Result<Self::Fruit> {
+        self.collector.merge_fruits(segment_fruits)
+    }
+}
+
+pub struct CustomScoreTopSegmentCollector<T, TScore>
+where
+    TScore: 'static + PartialOrd + Clone + Send + Sync + Sized,
+    T: CustomSegmentScorer<TScore>,
+{
+    segment_collector: TopSegmentCollector<TScore>,
+    segment_scorer: T,
+}
+
+impl<T, TScore> SegmentCollector for CustomScoreTopSegmentCollector<T, TScore>
+where
+    TScore: 'static + PartialOrd + Clone + Send + Sync,
+    T: 'static + CustomSegmentScorer<TScore>,
+{
+    type Fruit = Vec<(TScore, DocAddress)>;
+
+    fn collect(&mut self, doc: DocId, _score: Score) {
+        let score = self.segment_scorer.score(doc);
+        self.segment_collector.collect(doc, score);
+    }
+
+    fn harvest(self) -> Vec<(TScore, DocAddress)> {
+        self.segment_collector.harvest()
+    }
+}
+
+impl<F, TScore, T> CustomScorer<TScore> for F
+where
+    F: 'static + Send + Sync + Fn(&SegmentReader) -> T,
+    T: CustomSegmentScorer<TScore>,
+{
+    type Child = T;
+
+    fn segment_scorer(&self, segment_reader: &SegmentReader) -> crate::Result<Self::Child> {
+        Ok((self)(segment_reader))
+    }
+}
+
+impl<F, TScore> CustomSegmentScorer<TScore> for F
+where
+    F: 'static + FnMut(DocId) -> TScore,
+{
+    fn score(&mut self, doc: DocId) -> TScore {
+        (self)(doc)
+    }
+}
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -1,25 +1,21 @@
-use collector::Collector;
-use docset::SkipResult;
-use fastfield::FacetReader;
-use schema::Facet;
-use schema::Field;
-use std::cell::UnsafeCell;
+use crate::collector::Collector;
+use crate::collector::SegmentCollector;
+use crate::fastfield::FacetReader;
+use crate::schema::Facet;
+use crate::schema::Field;
+use crate::DocId;
+use crate::Score;
+use crate::SegmentLocalId;
+use crate::SegmentReader;
+use crate::TantivyError;
+use std::cmp::Ordering;
 use std::collections::btree_map;
 use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 use std::collections::BinaryHeap;
 use std::collections::Bound;
 use std::iter::Peekable;
-use std::mem;
 use std::{u64, usize};
-use termdict::TermMerger;
-
-use std::cmp::Ordering;
-use DocId;
-use Result;
-use Score;
-use SegmentLocalId;
-use SegmentReader;

 struct Hit<'a> {
    count: u64,
@@ -29,13 +25,13 @@ struct Hit<'a> {
 impl<'a> Eq for Hit<'a> {}

 impl<'a> PartialEq<Hit<'a>> for Hit<'a> {
-    fn eq(&self, other: &Hit) -> bool {
+    fn eq(&self, other: &Hit<'_>) -> bool {
        self.count == other.count
    }
 }

 impl<'a> PartialOrd<Hit<'a>> for Hit<'a> {
-    fn partial_cmp(&self, other: &Hit) -> Option<Ordering> {
+    fn partial_cmp(&self, other: &Hit<'_>) -> Option<Ordering> {
        Some(self.cmp(other))
    }
 }
@@ -46,12 +42,6 @@ impl<'a> Ord for Hit<'a> {
    }
 }

-struct SegmentFacetCounter {
-    pub facet_reader: FacetReader,
-    pub facet_ords: Vec<u64>,
-    pub facet_counts: Vec<u64>,
-}
-
 fn facet_depth(facet_bytes: &[u8]) -> usize {
    if facet_bytes.is_empty() {
        0
@@ -89,16 +79,13 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 ///
 ///
 /// ```rust
-/// #[macro_use]
-/// extern crate tantivy;
-/// use tantivy::schema::{Facet, SchemaBuilder, TEXT};
-/// use tantivy::{Index, Result};
 /// use tantivy::collector::FacetCollector;
 /// use tantivy::query::AllQuery;
+/// use tantivy::schema::{Facet, Schema, TEXT};
+/// use tantivy::{doc, Index};
 ///
-/// # fn main() { example().unwrap(); }
-/// fn example() -> Result<()> {
-///     let mut schema_builder = SchemaBuilder::new();
+/// fn example() -> tantivy::Result<()> {
+///     let mut schema_builder = Schema::builder();
 ///
 ///     // Facet have their own specific type.
 ///     // It is not a bad practise to put all of your
@@ -131,23 +118,19 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 ///             facet => Facet::from("/lang/en"),
 ///             facet => Facet::from("/category/biography")
 ///         ));
-///         index_writer.commit().unwrap();
+///         index_writer.commit()?;
 ///     }
-///
-///     index.load_searchers()?;
-///     let searcher = index.searcher();
+///     let reader = index.reader()?;
+///     let searcher = reader.searcher();
 ///
 ///     {
-///			let mut facet_collector = FacetCollector::for_field(facet);
+///         let mut facet_collector = FacetCollector::for_field(facet);
 ///         facet_collector.add_facet("/lang");
 ///         facet_collector.add_facet("/category");
-///         searcher.search(&AllQuery, &mut facet_collector).unwrap();
-///
-///         // this object contains count aggregate for all of the facets.
-///         let counts = facet_collector.harvest();
+///         let facet_counts = searcher.search(&AllQuery, &facet_collector)?;
 ///
 ///         // This lists all of the facet counts
-///         let facets: Vec<(&Facet, u64)> = counts
+///         let facets: Vec<(&Facet, u64)> = facet_counts
 ///             .get("/category")
 ///             .collect();
 ///         assert_eq!(facets, vec![
@@ -157,15 +140,12 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 ///     }
 ///
 ///     {
-///			let mut facet_collector = FacetCollector::for_field(facet);
+///         let mut facet_collector = FacetCollector::for_field(facet);
 ///         facet_collector.add_facet("/category/fiction");
-///         searcher.search(&AllQuery, &mut facet_collector).unwrap();
-///
-///         // this object contains count aggregate for all of the facets.
-///         let counts = facet_collector.harvest();
+///         let facet_counts = searcher.search(&AllQuery, &facet_collector)?;
 ///
 ///         // This lists all of the facet counts
-///         let facets: Vec<(&Facet, u64)> = counts
+///         let facets: Vec<(&Facet, u64)> = facet_counts
 ///             .get("/category/fiction")
 ///             .collect();
 ///         assert_eq!(facets, vec![
@@ -175,16 +155,13 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 ///         ]);
 ///     }
 ///
-///    {
-///			let mut facet_collector = FacetCollector::for_field(facet);
+///     {
+///         let mut facet_collector = FacetCollector::for_field(facet);
 ///         facet_collector.add_facet("/category/fiction");
-///         searcher.search(&AllQuery, &mut facet_collector).unwrap();
-///
-///         // this object contains count aggregate for all of the facets.
-///         let counts = facet_collector.harvest();
+///         let facet_counts = searcher.search(&AllQuery, &facet_collector)?;
 ///
 ///         // This lists all of the facet counts
-///         let facets: Vec<(&Facet, u64)> = counts.top_k("/category/fiction", 1);
+///         let facets: Vec<(&Facet, u64)> = facet_counts.top_k("/category/fiction", 1);
 ///         assert_eq!(facets, vec![
 ///             (&Facet::from("/category/fiction/fantasy"), 2)
 ///         ]);
@@ -192,40 +169,46 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 ///
 ///     Ok(())
 /// }
+/// # assert!(example().is_ok());
 /// ```
 pub struct FacetCollector {
-    facet_ords: Vec<u64>,
    field: Field,
-    ff_reader: Option<UnsafeCell<FacetReader>>,
-    segment_counters: Vec<SegmentFacetCounter>,
-
-    // facet_ord -> collapse facet_id
-    current_segment_collapse_mapping: Vec<usize>,
-    // collapse facet_id -> count
-    current_segment_counts: Vec<u64>,
-    // collapse facet_id -> facet_ord
-    current_collapse_facet_ords: Vec<u64>,
-
    facets: BTreeSet<Facet>,
 }

+pub struct FacetSegmentCollector {
+    reader: FacetReader,
+    facet_ords_buf: Vec<u64>,
+    // facet_ord -> collapse facet_id
+    collapse_mapping: Vec<usize>,
+    // collapse facet_id -> count
+    counts: Vec<u64>,
+    // collapse facet_id -> facet_ord
+    collapse_facet_ords: Vec<u64>,
+}
+
+enum SkipResult {
+    Found,
+    NotFound,
+}
+
 fn skip<'a, I: Iterator<Item = &'a Facet>>(
    target: &[u8],
    collapse_it: &mut Peekable<I>,
 ) -> SkipResult {
    loop {
        match collapse_it.peek() {
-            Some(facet_bytes) => match facet_bytes.encoded_bytes().cmp(target) {
+            Some(facet_bytes) => match facet_bytes.encoded_str().as_bytes().cmp(target) {
                Ordering::Less => {}
                Ordering::Greater => {
-                    return SkipResult::OverStep;
+                    return SkipResult::NotFound;
                }
                Ordering::Equal => {
-                    return SkipResult::Reached;
+                    return SkipResult::Found;
                }
            },
            None => {
-                return SkipResult::End;
+                return SkipResult::NotFound;
            }
        }
        collapse_it.next();
@@ -240,15 +223,8 @@ impl FacetCollector {
    /// is of the proper type.
    pub fn for_field(field: Field) -> FacetCollector {
        FacetCollector {
-            facet_ords: Vec::with_capacity(255),
-            segment_counters: Vec::new(),
            field,
-            ff_reader: None,
-            facets: BTreeSet::new(),
-
-            current_segment_collapse_mapping: Vec::new(),
-            current_collapse_facet_ords: Vec::new(),
-            current_segment_counts: Vec::new(),
+            facets: BTreeSet::default(),
        }
    }

@@ -278,141 +254,103 @@ impl FacetCollector {
        }
        self.facets.insert(facet);
    }
-
-    fn set_collapse_mapping(&mut self, facet_reader: &FacetReader) {
-        self.current_segment_collapse_mapping.clear();
-        self.current_collapse_facet_ords.clear();
-        self.current_segment_counts.clear();
-        let mut collapse_facet_it = self.facets.iter().peekable();
-        self.current_collapse_facet_ords.push(0);
-        let mut facet_streamer = facet_reader.facet_dict().range().into_stream();
-        if !facet_streamer.advance() {
-            return;
-        }
-        'outer: loop {
-            // at the begining of this loop, facet_streamer
-            // is positionned on a term that has not been processed yet.
-            let skip_result = skip(facet_streamer.key(), &mut collapse_facet_it);
-            match skip_result {
-                SkipResult::Reached => {
-                    // we reach a facet we decided to collapse.
-                    let collapse_depth = facet_depth(facet_streamer.key());
-                    let mut collapsed_id = 0;
-                    self.current_segment_collapse_mapping.push(0);
-                    while facet_streamer.advance() {
-                        let depth = facet_depth(facet_streamer.key());
-                        if depth <= collapse_depth {
-                            continue 'outer;
-                        }
-                        if depth == collapse_depth + 1 {
-                            collapsed_id = self.current_collapse_facet_ords.len();
-                            self.current_collapse_facet_ords
-                                .push(facet_streamer.term_ord());
-                            self.current_segment_collapse_mapping.push(collapsed_id);
-                        } else {
-                            self.current_segment_collapse_mapping.push(collapsed_id);
-                        }
-                    }
-                    break;
-                }
-                SkipResult::End | SkipResult::OverStep => {
-                    self.current_segment_collapse_mapping.push(0);
-                    if !facet_streamer.advance() {
-                        break;
-                    }
-                }
-            }
-        }
-    }
-
-    fn finalize_segment(&mut self) {
-        if self.ff_reader.is_some() {
-            self.segment_counters.push(SegmentFacetCounter {
-                facet_reader: self.ff_reader.take().unwrap().into_inner(),
-                facet_ords: mem::replace(&mut self.current_collapse_facet_ords, Vec::new()),
-                facet_counts: mem::replace(&mut self.current_segment_counts, Vec::new()),
-            });
-        }
-    }
-
-    /// Returns the results of the collection.
-    ///
-    /// This method does not just return the counters,
-    /// it also translates the facet ordinals of the last segment.
-    pub fn harvest(mut self) -> FacetCounts {
-        self.finalize_segment();
-
-        let collapsed_facet_ords: Vec<&[u64]> = self.segment_counters
-            .iter()
-            .map(|segment_counter| &segment_counter.facet_ords[..])
-            .collect();
-        let collapsed_facet_counts: Vec<&[u64]> = self.segment_counters
-            .iter()
-            .map(|segment_counter| &segment_counter.facet_counts[..])
-            .collect();
-
-        let facet_streams = self.segment_counters
-            .iter()
-            .map(|seg_counts| seg_counts.facet_reader.facet_dict().range().into_stream())
-            .collect::<Vec<_>>();
-
-        let mut facet_merger = TermMerger::new(facet_streams);
-        let mut facet_counts = BTreeMap::new();
-
-        while facet_merger.advance() {
-            let count = facet_merger
-                .current_kvs()
-                .iter()
-                .map(|it| {
-                    let seg_ord = it.segment_ord;
-                    let term_ord = it.streamer.term_ord();
-                    collapsed_facet_ords[seg_ord]
-                        .binary_search(&term_ord)
-                        .map(|collapsed_term_id| {
-                            if collapsed_term_id == 0 {
-                                0
-                            } else {
-                                collapsed_facet_counts[seg_ord][collapsed_term_id]
-                            }
-                        })
-                        .unwrap_or(0)
-                })
-                .sum();
-            if count > 0u64 {
-                let bytes: Vec<u8> = facet_merger.key().to_owned();
-                // may create an corrupted facet if the term dicitonary is corrupted
-                let facet = unsafe { Facet::from_encoded(bytes) };
-                facet_counts.insert(facet, count);
-            }
-        }
-        FacetCounts { facet_counts }
-    }
 }

 impl Collector for FacetCollector {
-    fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
-        self.finalize_segment();
-        let facet_reader = reader.facet_reader(self.field)?;
-        self.set_collapse_mapping(&facet_reader);
-        self.current_segment_counts
-            .resize(self.current_collapse_facet_ords.len(), 0);
-        self.ff_reader = Some(UnsafeCell::new(facet_reader));
-        Ok(())
+    type Fruit = FacetCounts;
+
+    type Child = FacetSegmentCollector;
+
+    fn for_segment(
+        &self,
+        _: SegmentLocalId,
+        reader: &SegmentReader,
+    ) -> crate::Result<FacetSegmentCollector> {
+        let field_name = reader.schema().get_field_name(self.field);
+        let facet_reader = reader.facet_reader(self.field).ok_or_else(|| {
+            TantivyError::SchemaError(format!("Field {:?} is not a facet field.", field_name))
+        })?;
+
+        let mut collapse_mapping = Vec::new();
+        let mut counts = Vec::new();
+        let mut collapse_facet_ords = Vec::new();
+
+        let mut collapse_facet_it = self.facets.iter().peekable();
+        collapse_facet_ords.push(0);
+        {
+            let mut facet_streamer = facet_reader.facet_dict().range().into_stream();
+            if facet_streamer.advance() {
+                'outer: loop {
+                    // at the begining of this loop, facet_streamer
+                    // is positionned on a term that has not been processed yet.
+                    let skip_result = skip(facet_streamer.key(), &mut collapse_facet_it);
+                    match skip_result {
+                        SkipResult::Found => {
+                            // we reach a facet we decided to collapse.
+                            let collapse_depth = facet_depth(facet_streamer.key());
+                            let mut collapsed_id = 0;
+                            collapse_mapping.push(0);
+                            while facet_streamer.advance() {
+                                let depth = facet_depth(facet_streamer.key());
+                                if depth <= collapse_depth {
+                                    continue 'outer;
+                                }
+                                if depth == collapse_depth + 1 {
+                                    collapsed_id = collapse_facet_ords.len();
+                                    collapse_facet_ords.push(facet_streamer.term_ord());
+                                    collapse_mapping.push(collapsed_id);
+                                } else {
+                                    collapse_mapping.push(collapsed_id);
+                                }
+                            }
+                            break;
+                        }
+                        SkipResult::NotFound => {
+                            collapse_mapping.push(0);
+                            if !facet_streamer.advance() {
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        counts.resize(collapse_facet_ords.len(), 0);
+
+        Ok(FacetSegmentCollector {
+            reader: facet_reader,
+            facet_ords_buf: Vec::with_capacity(255),
+            collapse_mapping,
+            counts,
+            collapse_facet_ords,
+        })
    }

+    fn requires_scoring(&self) -> bool {
+        false
+    }
+
+    fn merge_fruits(&self, segments_facet_counts: Vec<FacetCounts>) -> crate::Result<FacetCounts> {
+        let mut facet_counts: BTreeMap<Facet, u64> = BTreeMap::new();
+        for segment_facet_counts in segments_facet_counts {
+            for (facet, count) in segment_facet_counts.facet_counts {
+                *(facet_counts.entry(facet).or_insert(0)) += count;
+            }
+        }
+        Ok(FacetCounts { facet_counts })
+    }
+}
+
+impl SegmentCollector for FacetSegmentCollector {
+    type Fruit = FacetCounts;
+
    fn collect(&mut self, doc: DocId, _: Score) {
-        let facet_reader: &mut FacetReader = unsafe {
-            &mut *self.ff_reader
-                .as_ref()
-                .expect("collect() was called before set_segment. This should never happen.")
-                .get()
-        };
-        facet_reader.facet_ords(doc, &mut self.facet_ords);
+        self.reader.facet_ords(doc, &mut self.facet_ords_buf);
        let mut previous_collapsed_ord: usize = usize::MAX;
-        for &facet_ord in &self.facet_ords {
-            let collapsed_ord = self.current_segment_collapse_mapping[facet_ord as usize];
-            self.current_segment_counts[collapsed_ord] += if collapsed_ord == previous_collapsed_ord
-            {
+        for &facet_ord in &self.facet_ords_buf {
+            let collapsed_ord = self.collapse_mapping[facet_ord as usize];
+            self.counts[collapsed_ord] += if collapsed_ord == previous_collapsed_ord {
                0
            } else {
                1
@@ -421,8 +359,24 @@ impl Collector for FacetCollector {
        }
    }

-    fn requires_scoring(&self) -> bool {
-        false
+    /// Returns the results of the collection.
+    ///
+    /// This method does not just return the counters,
+    /// it also translates the facet ordinals of the last segment.
+    fn harvest(self) -> FacetCounts {
+        let mut facet_counts = BTreeMap::new();
+        let facet_dict = self.reader.facet_dict();
+        for (collapsed_facet_ord, count) in self.counts.iter().cloned().enumerate() {
+            if count == 0 {
+                continue;
+            }
+            let mut facet = vec![];
+            let facet_ord = self.collapse_facet_ords[collapsed_facet_ord];
+            facet_dict.ord_to_term(facet_ord as u64, &mut facet);
+            // TODO
+            facet_counts.insert(Facet::from_encoded(facet).unwrap(), count);
+        }
+        FacetCounts { facet_counts }
    }
 }

@@ -445,7 +399,7 @@ impl<'a> Iterator for FacetChildIterator<'a> {
 }

 impl FacetCounts {
-    pub fn get<T>(&self, facet_from: T) -> FacetChildIterator
+    pub fn get<T>(&self, facet_from: T) -> FacetChildIterator<'_>
    where
        Facet: From<T>,
    {
@@ -454,12 +408,13 @@ impl FacetCounts {
        let right_bound = if facet.is_root() {
            Bound::Unbounded
        } else {
-            let mut facet_after_bytes: Vec<u8> = facet.encoded_bytes().to_owned();
-            facet_after_bytes.push(1u8);
-            let facet_after = unsafe { Facet::from_encoded(facet_after_bytes) }; // ok logic
+            let mut facet_after_bytes: String = facet.encoded_str().to_owned();
+            facet_after_bytes.push('\u{1}');
+            let facet_after = Facet::from_encoded_string(facet_after_bytes);
            Bound::Excluded(facet_after)
        };
-        let underlying: btree_map::Range<_, _> = self.facet_counts.range((left_bound, right_bound));
+        let underlying: btree_map::Range<'_, _, _> =
+            self.facet_counts.range((left_bound, right_bound));
        FacetChildIterator { underlying }
    }

@@ -470,17 +425,24 @@ impl FacetCounts {
        let mut heap = BinaryHeap::with_capacity(k);
        let mut it = self.get(facet);

+        // push the first k elements to first bring the heap
+        // to capacity
        for (facet, count) in (&mut it).take(k) {
            heap.push(Hit { count, facet });
        }

-        let mut lowest_count: u64 = heap.peek().map(|hit| hit.count).unwrap_or(u64::MIN);
+        let mut lowest_count: u64 = heap.peek().map(|hit| hit.count).unwrap_or(u64::MIN); //< the `unwrap_or` case may be triggered but the value
+                                                                                          // is never used in that case.
+
        for (facet, count) in it {
            if count > lowest_count {
-                lowest_count = count;
                if let Some(mut head) = heap.peek_mut() {
                    *head = Hit { count, facet };
                }
+                // the heap gets reconstructed at this point
+                if let Some(head) = heap.peek() {
+                    lowest_count = head.count;
+                }
            }
        }
        heap.into_sorted_vec()
@@ -493,16 +455,19 @@ impl FacetCounts {
 #[cfg(test)]
 mod tests {
    use super::{FacetCollector, FacetCounts};
-    use core::Index;
-    use query::AllQuery;
+    use crate::collector::Count;
+    use crate::core::Index;
+    use crate::query::{AllQuery, QueryParser, TermQuery};
+    use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema};
+    use crate::Term;
+    use rand::distributions::Uniform;
+    use rand::prelude::SliceRandom;
    use rand::{thread_rng, Rng};
-    use schema::Field;
-    use schema::{Document, Facet, SchemaBuilder};
    use std::iter;

    #[test]
    fn test_facet_collector_drilldown() {
-        let mut schema_builder = SchemaBuilder::new();
+        let mut schema_builder = Schema::builder();
        let facet_field = schema_builder.add_facet_field("facet");
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
@@ -525,14 +490,12 @@ mod tests {
            index_writer.add_document(doc);
        }
        index_writer.commit().unwrap();
-        index.load_searchers().unwrap();
-        let searcher = index.searcher();
-
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();
        let mut facet_collector = FacetCollector::for_field(facet_field);
        facet_collector.add_facet(Facet::from("/top1"));
-        searcher.search(&AllQuery, &mut facet_collector).unwrap();
+        let counts = searcher.search(&AllQuery, &facet_collector).unwrap();

-        let counts: FacetCounts = facet_collector.harvest();
        {
            let facets: Vec<(String, u64)> = counts
                .get("/top1")
@@ -545,91 +508,168 @@ mod tests {
                    ("/top1/mid1", 50),
                    ("/top1/mid2", 50),
                    ("/top1/mid3", 50),
-                ].iter()
-                    .map(|&(facet_str, count)| (String::from(facet_str), count))
-                    .collect::<Vec<_>>()
+                ]
+                .iter()
+                .map(|&(facet_str, count)| (String::from(facet_str), count))
+                .collect::<Vec<_>>()
            );
        }
    }

    #[test]
-    #[should_panic(
-        expected = "Tried to add a facet which is a descendant of \
-                    an already added facet."
-    )]
+    #[should_panic(expected = "Tried to add a facet which is a descendant of \
+                               an already added facet.")]
    fn test_misused_facet_collector() {
-        let mut facet_collector = FacetCollector::for_field(Field(0));
+        let mut facet_collector = FacetCollector::for_field(Field::from_field_id(0));
        facet_collector.add_facet(Facet::from("/country"));
        facet_collector.add_facet(Facet::from("/country/europe"));
    }

+    #[test]
+    fn test_doc_unsorted_multifacet() {
+        let mut schema_builder = Schema::builder();
+        let facet_field = schema_builder.add_facet_field("facets");
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        index_writer.add_document(doc!(
+            facet_field => Facet::from_text(&"/subjects/A/a"),
+            facet_field => Facet::from_text(&"/subjects/B/a"),
+            facet_field => Facet::from_text(&"/subjects/A/b"),
+            facet_field => Facet::from_text(&"/subjects/B/b"),
+        ));
+        index_writer.commit().unwrap();
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();
+        assert_eq!(searcher.num_docs(), 1);
+        let mut facet_collector = FacetCollector::for_field(facet_field);
+        facet_collector.add_facet("/subjects");
+        let counts = searcher.search(&AllQuery, &facet_collector).unwrap();
+        let facets: Vec<(&Facet, u64)> = counts.get("/subjects").collect();
+        assert_eq!(facets[0].1, 1);
+    }
+
+    #[test]
+    fn test_doc_search_by_facet() {
+        let mut schema_builder = Schema::builder();
+        let facet_field = schema_builder.add_facet_field("facet");
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        index_writer.add_document(doc!(
+            facet_field => Facet::from_text(&"/A/A"),
+        ));
+        index_writer.add_document(doc!(
+            facet_field => Facet::from_text(&"/A/B"),
+        ));
+        index_writer.add_document(doc!(
+            facet_field => Facet::from_text(&"/A/C/A"),
+        ));
+        index_writer.add_document(doc!(
+            facet_field => Facet::from_text(&"/D/C/A"),
+        ));
+        index_writer.commit().unwrap();
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();
+        assert_eq!(searcher.num_docs(), 4);
+
+        let count_facet = |facet_str: &str| {
+            let term = Term::from_facet(facet_field, &Facet::from_text(facet_str));
+            searcher
+                .search(&TermQuery::new(term, IndexRecordOption::Basic), &Count)
+                .unwrap()
+        };
+
+        assert_eq!(count_facet("/"), 4);
+        assert_eq!(count_facet("/A"), 3);
+        assert_eq!(count_facet("/A/B"), 1);
+        assert_eq!(count_facet("/A/C"), 1);
+        assert_eq!(count_facet("/A/C/A"), 1);
+        assert_eq!(count_facet("/C/A"), 0);
+        {
+            let query_parser = QueryParser::for_index(&index, vec![]);
+            {
+                let query = query_parser.parse_query("facet:/A/B").unwrap();
+                assert_eq!(1, searcher.search(&query, &Count).unwrap());
+            }
+            {
+                let query = query_parser.parse_query("facet:/A").unwrap();
+                assert_eq!(3, searcher.search(&query, &Count).unwrap());
+            }
+        }
+    }
+
    #[test]
    fn test_non_used_facet_collector() {
-        let mut facet_collector = FacetCollector::for_field(Field(0));
+        let mut facet_collector = FacetCollector::for_field(Field::from_field_id(0));
        facet_collector.add_facet(Facet::from("/country"));
        facet_collector.add_facet(Facet::from("/countryeurope"));
    }

    #[test]
    fn test_facet_collector_topk() {
-        let mut schema_builder = SchemaBuilder::new();
+        let mut schema_builder = Schema::builder();
        let facet_field = schema_builder.add_facet_field("facet");
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);

+        let uniform = Uniform::new_inclusive(1, 100_000);
        let mut docs: Vec<Document> = vec![("a", 10), ("b", 100), ("c", 7), ("d", 12), ("e", 21)]
            .into_iter()
            .flat_map(|(c, count)| {
-                let facet = Facet::from(&format!("/facet_{}", c));
+                let facet = Facet::from(&format!("/facet/{}", c));
                let doc = doc!(facet_field => facet);
                iter::repeat(doc).take(count)
            })
+            .map(|mut doc| {
+                doc.add_facet(
+                    facet_field,
+                    &format!("/facet/{}", thread_rng().sample(&uniform)),
+                );
+                doc
+            })
            .collect();
-        thread_rng().shuffle(&mut docs[..]);
+        docs[..].shuffle(&mut thread_rng());

        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
        for doc in docs {
            index_writer.add_document(doc);
        }
        index_writer.commit().unwrap();
-        index.load_searchers().unwrap();
-
-        let searcher = index.searcher();
+        let searcher = index.reader().unwrap().searcher();

        let mut facet_collector = FacetCollector::for_field(facet_field);
-        facet_collector.add_facet("/");
-        searcher.search(&AllQuery, &mut facet_collector).unwrap();
+        facet_collector.add_facet("/facet");
+        let counts: FacetCounts = searcher.search(&AllQuery, &facet_collector).unwrap();

-        let counts: FacetCounts = facet_collector.harvest();
        {
-            let facets: Vec<(&Facet, u64)> = counts.top_k("/", 3);
+            let facets: Vec<(&Facet, u64)> = counts.top_k("/facet", 3);
            assert_eq!(
                facets,
                vec![
-                    (&Facet::from("/facet_b"), 100),
-                    (&Facet::from("/facet_e"), 21),
-                    (&Facet::from("/facet_d"), 12),
+                    (&Facet::from("/facet/b"), 100),
+                    (&Facet::from("/facet/e"), 21),
+                    (&Facet::from("/facet/d"), 12),
                ]
            );
        }
    }
-
 }

 #[cfg(all(test, feature = "unstable"))]
 mod bench {

-    use collector::FacetCollector;
-    use query::AllQuery;
-    use rand::{thread_rng, Rng};
-    use schema::Facet;
-    use schema::SchemaBuilder;
+    use crate::collector::FacetCollector;
+    use crate::query::AllQuery;
+    use crate::schema::{Facet, Schema};
+    use crate::Index;
+    use rand::seq::SliceRandom;
+    use rand::thread_rng;
    use test::Bencher;
-    use Index;

    #[bench]
    fn bench_facet_collector(b: &mut Bencher) {
-        let mut schema_builder = SchemaBuilder::new();
+        let mut schema_builder = Schema::builder();
        let facet_field = schema_builder.add_facet_field("facet");
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
@@ -642,19 +682,18 @@ mod bench {
            }
        }
        // 40425 docs
-        thread_rng().shuffle(&mut docs[..]);
+        docs[..].shuffle(&mut thread_rng());

        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
        for doc in docs {
            index_writer.add_document(doc);
        }
        index_writer.commit().unwrap();
-        index.load_searchers().unwrap();
-
+        let reader = index.reader().unwrap();
        b.iter(|| {
-            let searcher = index.searcher();
-            let mut facet_collector = FacetCollector::for_field(facet_field);
-            searcher.search(&AllQuery, &mut facet_collector).unwrap();
+            let searcher = reader.searcher();
+            let facet_collector = FacetCollector::for_field(facet_field);
+            searcher.search(&AllQuery, &facet_collector).unwrap();
        });
    }
 }
--- a/src/collector/int_facet_collector.rs
+++ b/src/collector/int_facet_collector.rs
@@ -79,21 +79,23 @@ mod tests {
    // make sure we have facet counters correctly filled
    fn test_facet_collector_results() {

-        let mut schema_builder = schema::SchemaBuilder::new();
+        let mut schema_builder = schema::Schema::builder();
        let num_field_i64 = schema_builder.add_i64_field("num_i64", FAST);
        let num_field_u64 = schema_builder.add_u64_field("num_u64", FAST);
+        let num_field_f64 = schema_builder.add_f64_field("num_f64", FAST);
        let text_field = schema_builder.add_text_field("text", STRING);
        let schema = schema_builder.build();

        let index = Index::create_in_ram(schema.clone());

        {
-            let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
+            let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
            {
                for i in 0u64..10u64 {
                    index_writer.add_document(doc!(
                        num_field_i64 => ((i as i64) % 3i64) as i64,
                        num_field_u64 => (i % 2u64) as u64,
+                        num_field_f64 => (i % 4u64) as f64,
                        text_field => "text"
                    ));
                }
@@ -101,14 +103,14 @@ mod tests {
            assert_eq!(index_writer.commit().unwrap(), 10u64);
        }

-        index.load_searchers().unwrap();
-        let searcher = index.searcher();
+        let searcher = index.reader().searcher();
        let mut ffvf_i64: IntFacetCollector<I64FastFieldReader> = IntFacetCollector::new(num_field_i64);
        let mut ffvf_u64: IntFacetCollector<U64FastFieldReader> = IntFacetCollector::new(num_field_u64);
+        let mut ffvf_f64: IntFacetCollector<F64FastFieldReader> = IntFacetCollector::new(num_field_f64);

        {
            // perform the query
-            let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64);
+            let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64).push(&mut ffvf_f64);
            let mut query_parser = QueryParser::for_index(index, vec![text_field]);
            let query = query_parser.parse_query("text:text").unwrap();
            query.search(&searcher, &mut facet_collectors).unwrap();
@@ -118,6 +120,8 @@ mod tests {
        assert_eq!(ffvf_u64.counters[&1], 5);
        assert_eq!(ffvf_i64.counters[&0], 4);
        assert_eq!(ffvf_i64.counters[&1], 3);
+        assert_eq!(ffvf_f64.counters[&0.0], 3);
+        assert_eq!(ffvf_f64.counters[&2.0], 2);

    }
 }
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -1,246 +1,401 @@
 /*!
-Defines how the documents matching a search query should be processed.
+
+# Collectors
+
+Collectors define the information you want to extract from the documents matching the queries.
+In tantivy jargon, we call this information your search "fruit".
+
+Your fruit could for instance be :
+- [the count of matching documents](./struct.Count.html)
+- [the top 10 documents, by relevancy or by a fast field](./struct.TopDocs.html)
+- [facet counts](./struct.FacetCollector.html)
+
+At one point in your code, you will trigger the actual search operation by calling
+[the `search(...)` method of your `Searcher` object](../struct.Searcher.html#method.search).
+This call will look like this.
+
+```verbatim
+let fruit = searcher.search(&query, &collector)?;
+```
+
+Here the type of fruit is actually determined as an associated type of the collector (`Collector::Fruit`).
+
+
+# Combining several collectors
+
+A rich search experience often requires to run several collectors on your search query.
+For instance,
+- selecting the top-K products matching your query
+- counting the matching documents
+- computing several facets
+- computing statistics about the matching product prices
+
+A simple and efficient way to do that is to pass your collectors as one tuple.
+The resulting `Fruit` will then be a typed tuple with each collector's original fruits
+in their respective position.
+
+```rust
+# use tantivy::schema::*;
+# use tantivy::*;
+# use tantivy::query::*;
+use tantivy::collector::{Count, TopDocs};
+#
+# fn main() -> tantivy::Result<()> {
+# let mut schema_builder = Schema::builder();
+#     let title = schema_builder.add_text_field("title", TEXT);
+#     let schema = schema_builder.build();
+#     let index = Index::create_in_ram(schema);
+#     let mut index_writer = index.writer(3_000_000)?;
+#       index_writer.add_document(doc!(
+#       title => "The Name of the Wind",
+#      ));
+#     index_writer.add_document(doc!(
+#        title => "The Diary of Muadib",
+#     ));
+#     index_writer.commit()?;
+#     let reader = index.reader()?;
+#     let searcher = reader.searcher();
+#     let query_parser = QueryParser::for_index(&index, vec![title]);
+#     let query = query_parser.parse_query("diary")?;
+let (doc_count, top_docs): (usize, Vec<(Score, DocAddress)>) =
+    searcher.search(&query, &(Count, TopDocs::with_limit(2)))?;
+#     Ok(())
+# }
+```
+
+The `Collector` trait is implemented for up to 4 collectors.
+If you have more than 4 collectors, you can either group them into
+tuples of tuples `(a,(b,(c,d)))`, or rely on [`MultiCollector`](./struct.MultiCollector.html).
+
+# Combining several collectors dynamically
+
+Combining collectors into a tuple is a zero-cost abstraction: everything
+happens as if you had manually implemented a single collector
+combining all of our features.
+
+Unfortunately it requires you to know at compile time your collector types.
+If on the other hand, the collectors depend on some query parameter,
+you can rely on `MultiCollector`'s.
+
+
+# Implementing your own collectors.
+
+See the `custom_collector` example.
+
 */

-use DocId;
-use Result;
-use Score;
-use SegmentLocalId;
-use SegmentReader;
+use crate::DocId;
+use crate::Score;
+use crate::SegmentLocalId;
+use crate::SegmentReader;
+use downcast_rs::impl_downcast;

 mod count_collector;
-pub use self::count_collector::CountCollector;
+pub use self::count_collector::Count;

 mod multi_collector;
 pub use self::multi_collector::MultiCollector;

 mod top_collector;
-pub use self::top_collector::TopCollector;
+
+mod top_score_collector;
+pub use self::top_score_collector::TopDocs;
+
+mod custom_score_top_collector;
+pub use self::custom_score_top_collector::{CustomScorer, CustomSegmentScorer};
+
+mod tweak_score_top_collector;
+pub use self::tweak_score_top_collector::{ScoreSegmentTweaker, ScoreTweaker};

 mod facet_collector;
 pub use self::facet_collector::FacetCollector;
+use crate::query::Weight;

-mod chained_collector;
-pub use self::chained_collector::{chain, ChainedCollector};
+/// `Fruit` is the type for the result of our collection.
+/// e.g. `usize` for the `Count` collector.
+pub trait Fruit: Send + downcast_rs::Downcast {}
+
+impl<T> Fruit for T where T: Send + downcast_rs::Downcast {}

 /// Collectors are in charge of collecting and retaining relevant
 /// information from the document found and scored by the query.
 ///
-///
 /// For instance,
 ///
 /// - keeping track of the top 10 best documents
 /// - computing a breakdown over a fast field
 /// - computing the number of documents matching the query
 ///
-/// Queries are in charge of pushing the `DocSet` to the collector.
+/// Our search index is in fact a collection of segments, so
+/// a `Collector` trait is actually more of a factory to instance
+/// `SegmentCollector`s for each segments.
 ///
-/// As they work on multiple segments, they first inform
-/// the collector of a change in a segment and then
-/// call the `collect` method to push the document to the collector.
-///
-/// Temporally, our collector will receive calls
-/// - `.set_segment(0, segment_reader_0)`
-/// - `.collect(doc0_of_segment_0)`
-/// - `.collect(...)`
-/// - `.collect(last_doc_of_segment_0)`
-/// - `.set_segment(1, segment_reader_1)`
-/// - `.collect(doc0_of_segment_1)`
-/// - `.collect(...)`
-/// - `.collect(last_doc_of_segment_1)`
-/// - `...`
-/// - `.collect(last_doc_of_last_segment)`
+/// The collection logic itself is in the `SegmentCollector`.
 ///
 /// Segments are not guaranteed to be visited in any specific order.
-pub trait Collector {
+pub trait Collector: Sync + Send {
+    /// `Fruit` is the type for the result of our collection.
+    /// e.g. `usize` for the `Count` collector.
+    type Fruit: Fruit;
+
+    /// Type of the `SegmentCollector` associated to this collector.
+    type Child: SegmentCollector<Fruit = Self::Fruit>;
+
    /// `set_segment` is called before beginning to enumerate
    /// on this segment.
-    fn set_segment(
-        &mut self,
+    fn for_segment(
+        &self,
        segment_local_id: SegmentLocalId,
        segment: &SegmentReader,
-    ) -> Result<()>;
-    /// The query pushes the scored document to the collector via this method.
-    fn collect(&mut self, doc: DocId, score: Score);
+    ) -> crate::Result<Self::Child>;

    /// Returns true iff the collector requires to compute scores for documents.
    fn requires_scoring(&self) -> bool;
+
+    /// Combines the fruit associated to the collection of each segments
+    /// into one fruit.
+    fn merge_fruits(&self, segment_fruits: Vec<Self::Fruit>) -> crate::Result<Self::Fruit>;
+
+    /// Created a segment collector and
+    fn collect_segment(
+        &self,
+        weight: &dyn Weight,
+        segment_ord: u32,
+        reader: &SegmentReader,
+    ) -> crate::Result<<Self::Child as SegmentCollector>::Fruit> {
+        let mut segment_collector = self.for_segment(segment_ord as u32, reader)?;
+
+        if let Some(delete_bitset) = reader.delete_bitset() {
+            weight.for_each(reader, &mut |doc, score| {
+                if delete_bitset.is_alive(doc) {
+                    segment_collector.collect(doc, score);
+                }
+            })?;
+        } else {
+            weight.for_each(reader, &mut |doc, score| {
+                segment_collector.collect(doc, score);
+            })?;
+        }
+        Ok(segment_collector.harvest())
+    }
 }

-impl<'a, C: Collector> Collector for &'a mut C {
-    fn set_segment(
-        &mut self,
-        segment_local_id: SegmentLocalId,
-        segment: &SegmentReader,
-    ) -> Result<()> {
-        (*self).set_segment(segment_local_id, segment)
-    }
+/// The `SegmentCollector` is the trait in charge of defining the
+/// collect operation at the scale of the segment.
+///
+/// `.collect(doc, score)` will be called for every documents
+/// matching the query.
+pub trait SegmentCollector: 'static {
+    /// `Fruit` is the type for the result of our collection.
+    /// e.g. `usize` for the `Count` collector.
+    type Fruit: Fruit;
+
    /// The query pushes the scored document to the collector via this method.
-    fn collect(&mut self, doc: DocId, score: Score) {
-        C::collect(self, doc, score)
+    fn collect(&mut self, doc: DocId, score: Score);
+
+    /// Extract the fruit of the collection from the `SegmentCollector`.
+    fn harvest(self) -> Self::Fruit;
+}
+
+// -----------------------------------------------
+// Tuple implementations.
+
+impl<Left, Right> Collector for (Left, Right)
+where
+    Left: Collector,
+    Right: Collector,
+{
+    type Fruit = (Left::Fruit, Right::Fruit);
+    type Child = (Left::Child, Right::Child);
+
+    fn for_segment(
+        &self,
+        segment_local_id: u32,
+        segment: &SegmentReader,
+    ) -> crate::Result<Self::Child> {
+        let left = self.0.for_segment(segment_local_id, segment)?;
+        let right = self.1.for_segment(segment_local_id, segment)?;
+        Ok((left, right))
    }

    fn requires_scoring(&self) -> bool {
-        C::requires_scoring(self)
+        self.0.requires_scoring() || self.1.requires_scoring()
+    }
+
+    fn merge_fruits(
+        &self,
+        children: Vec<(Left::Fruit, Right::Fruit)>,
+    ) -> crate::Result<(Left::Fruit, Right::Fruit)> {
+        let mut left_fruits = vec![];
+        let mut right_fruits = vec![];
+        for (left_fruit, right_fruit) in children {
+            left_fruits.push(left_fruit);
+            right_fruits.push(right_fruit);
+        }
+        Ok((
+            self.0.merge_fruits(left_fruits)?,
+            self.1.merge_fruits(right_fruits)?,
+        ))
    }
 }

+impl<Left, Right> SegmentCollector for (Left, Right)
+where
+    Left: SegmentCollector,
+    Right: SegmentCollector,
+{
+    type Fruit = (Left::Fruit, Right::Fruit);
+
+    fn collect(&mut self, doc: DocId, score: Score) {
+        self.0.collect(doc, score);
+        self.1.collect(doc, score);
+    }
+
+    fn harvest(self) -> <Self as SegmentCollector>::Fruit {
+        (self.0.harvest(), self.1.harvest())
+    }
+}
+
+// 3-Tuple
+
+impl<One, Two, Three> Collector for (One, Two, Three)
+where
+    One: Collector,
+    Two: Collector,
+    Three: Collector,
+{
+    type Fruit = (One::Fruit, Two::Fruit, Three::Fruit);
+    type Child = (One::Child, Two::Child, Three::Child);
+
+    fn for_segment(
+        &self,
+        segment_local_id: u32,
+        segment: &SegmentReader,
+    ) -> crate::Result<Self::Child> {
+        let one = self.0.for_segment(segment_local_id, segment)?;
+        let two = self.1.for_segment(segment_local_id, segment)?;
+        let three = self.2.for_segment(segment_local_id, segment)?;
+        Ok((one, two, three))
+    }
+
+    fn requires_scoring(&self) -> bool {
+        self.0.requires_scoring() || self.1.requires_scoring() || self.2.requires_scoring()
+    }
+
+    fn merge_fruits(&self, children: Vec<Self::Fruit>) -> crate::Result<Self::Fruit> {
+        let mut one_fruits = vec![];
+        let mut two_fruits = vec![];
+        let mut three_fruits = vec![];
+        for (one_fruit, two_fruit, three_fruit) in children {
+            one_fruits.push(one_fruit);
+            two_fruits.push(two_fruit);
+            three_fruits.push(three_fruit);
+        }
+        Ok((
+            self.0.merge_fruits(one_fruits)?,
+            self.1.merge_fruits(two_fruits)?,
+            self.2.merge_fruits(three_fruits)?,
+        ))
+    }
+}
+
+impl<One, Two, Three> SegmentCollector for (One, Two, Three)
+where
+    One: SegmentCollector,
+    Two: SegmentCollector,
+    Three: SegmentCollector,
+{
+    type Fruit = (One::Fruit, Two::Fruit, Three::Fruit);
+
+    fn collect(&mut self, doc: DocId, score: Score) {
+        self.0.collect(doc, score);
+        self.1.collect(doc, score);
+        self.2.collect(doc, score);
+    }
+
+    fn harvest(self) -> <Self as SegmentCollector>::Fruit {
+        (self.0.harvest(), self.1.harvest(), self.2.harvest())
+    }
+}
+
+// 4-Tuple
+
+impl<One, Two, Three, Four> Collector for (One, Two, Three, Four)
+where
+    One: Collector,
+    Two: Collector,
+    Three: Collector,
+    Four: Collector,
+{
+    type Fruit = (One::Fruit, Two::Fruit, Three::Fruit, Four::Fruit);
+    type Child = (One::Child, Two::Child, Three::Child, Four::Child);
+
+    fn for_segment(
+        &self,
+        segment_local_id: u32,
+        segment: &SegmentReader,
+    ) -> crate::Result<Self::Child> {
+        let one = self.0.for_segment(segment_local_id, segment)?;
+        let two = self.1.for_segment(segment_local_id, segment)?;
+        let three = self.2.for_segment(segment_local_id, segment)?;
+        let four = self.3.for_segment(segment_local_id, segment)?;
+        Ok((one, two, three, four))
+    }
+
+    fn requires_scoring(&self) -> bool {
+        self.0.requires_scoring()
+            || self.1.requires_scoring()
+            || self.2.requires_scoring()
+            || self.3.requires_scoring()
+    }
+
+    fn merge_fruits(&self, children: Vec<Self::Fruit>) -> crate::Result<Self::Fruit> {
+        let mut one_fruits = vec![];
+        let mut two_fruits = vec![];
+        let mut three_fruits = vec![];
+        let mut four_fruits = vec![];
+        for (one_fruit, two_fruit, three_fruit, four_fruit) in children {
+            one_fruits.push(one_fruit);
+            two_fruits.push(two_fruit);
+            three_fruits.push(three_fruit);
+            four_fruits.push(four_fruit);
+        }
+        Ok((
+            self.0.merge_fruits(one_fruits)?,
+            self.1.merge_fruits(two_fruits)?,
+            self.2.merge_fruits(three_fruits)?,
+            self.3.merge_fruits(four_fruits)?,
+        ))
+    }
+}
+
+impl<One, Two, Three, Four> SegmentCollector for (One, Two, Three, Four)
+where
+    One: SegmentCollector,
+    Two: SegmentCollector,
+    Three: SegmentCollector,
+    Four: SegmentCollector,
+{
+    type Fruit = (One::Fruit, Two::Fruit, Three::Fruit, Four::Fruit);
+
+    fn collect(&mut self, doc: DocId, score: Score) {
+        self.0.collect(doc, score);
+        self.1.collect(doc, score);
+        self.2.collect(doc, score);
+        self.3.collect(doc, score);
+    }
+
+    fn harvest(self) -> <Self as SegmentCollector>::Fruit {
+        (
+            self.0.harvest(),
+            self.1.harvest(),
+            self.2.harvest(),
+            self.3.harvest(),
+        )
+    }
+}
+
+impl_downcast!(Fruit);
+
 #[cfg(test)]
-pub mod tests {
-
-    use super::*;
-    use core::SegmentReader;
-    use fastfield::BytesFastFieldReader;
-    use fastfield::FastFieldReader;
-    use schema::Field;
-    use DocId;
-    use Score;
-    use SegmentLocalId;
-
-    /// Stores all of the doc ids.
-    /// This collector is only used for tests.
-    /// It is unusable in practise, as it does not store
-    /// the segment ordinals
-    pub struct TestCollector {
-        offset: DocId,
-        segment_max_doc: DocId,
-        docs: Vec<DocId>,
-        scores: Vec<Score>,
-    }
-
-    impl TestCollector {
-        /// Return the exhalist of documents.
-        pub fn docs(self) -> Vec<DocId> {
-            self.docs
-        }
-
-        pub fn scores(self) -> Vec<Score> {
-            self.scores
-        }
-    }
-
-    impl Default for TestCollector {
-        fn default() -> TestCollector {
-            TestCollector {
-                offset: 0,
-                segment_max_doc: 0,
-                docs: Vec::new(),
-                scores: Vec::new(),
-            }
-        }
-    }
-
-    impl Collector for TestCollector {
-        fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
-            self.offset += self.segment_max_doc;
-            self.segment_max_doc = reader.max_doc();
-            Ok(())
-        }
-
-        fn collect(&mut self, doc: DocId, score: Score) {
-            self.docs.push(doc + self.offset);
-            self.scores.push(score);
-        }
-
-        fn requires_scoring(&self) -> bool {
-            true
-        }
-    }
-
-    /// Collects in order all of the fast fields for all of the
-    /// doc in the `DocSet`
-    ///
-    /// This collector is mainly useful for tests.
-    pub struct FastFieldTestCollector {
-        vals: Vec<u64>,
-        field: Field,
-        ff_reader: Option<FastFieldReader<u64>>,
-    }
-
-    impl FastFieldTestCollector {
-        pub fn for_field(field: Field) -> FastFieldTestCollector {
-            FastFieldTestCollector {
-                vals: Vec::new(),
-                field,
-                ff_reader: None,
-            }
-        }
-
-        pub fn vals(self) -> Vec<u64> {
-            self.vals
-        }
-    }
-
-    impl Collector for FastFieldTestCollector {
-        fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
-            self.ff_reader = Some(reader.fast_field_reader(self.field)?);
-            Ok(())
-        }
-
-        fn collect(&mut self, doc: DocId, _score: Score) {
-            let val = self.ff_reader.as_ref().unwrap().get(doc);
-            self.vals.push(val);
-        }
-        fn requires_scoring(&self) -> bool {
-            false
-        }
-    }
-
-    /// Collects in order all of the fast field bytes for all of the
-    /// docs in the `DocSet`
-    ///
-    /// This collector is mainly useful for tests.
-    pub struct BytesFastFieldTestCollector {
-        vals: Vec<u8>,
-        field: Field,
-        ff_reader: Option<BytesFastFieldReader>,
-    }
-
-    impl BytesFastFieldTestCollector {
-        pub fn for_field(field: Field) -> BytesFastFieldTestCollector {
-            BytesFastFieldTestCollector {
-                vals: Vec::new(),
-                field,
-                ff_reader: None,
-            }
-        }
-
-        pub fn vals(self) -> Vec<u8> {
-            self.vals
-        }
-    }
-
-    impl Collector for BytesFastFieldTestCollector {
-        fn set_segment(&mut self, _segment_local_id: u32, segment: &SegmentReader) -> Result<()> {
-            self.ff_reader = Some(segment.bytes_fast_field_reader(self.field)?);
-            Ok(())
-        }
-
-        fn collect(&mut self, doc: u32, _score: f32) {
-            let val = self.ff_reader.as_ref().unwrap().get_val(doc);
-            self.vals.extend(val);
-        }
-
-        fn requires_scoring(&self) -> bool {
-            false
-        }
-    }
-}
-
-#[cfg(all(test, feature = "unstable"))]
-mod bench {
-    use collector::{Collector, CountCollector};
-    use test::Bencher;
-
-    #[bench]
-    fn build_collector(b: &mut Bencher) {
-        b.iter(|| {
-            let mut count_collector = CountCollector::default();
-            let docs: Vec<u32> = (0..1_000_000).collect();
-            for doc in docs {
-                count_collector.collect(doc, 1f32);
-            }
-            count_collector.count()
-        });
-    }
-}
+pub mod tests;
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -1,98 +1,242 @@
 use super::Collector;
-use DocId;
-use Result;
-use Score;
-use SegmentLocalId;
-use SegmentReader;
+use super::SegmentCollector;
+use crate::collector::Fruit;
+use crate::DocId;
+use crate::Score;
+use crate::SegmentLocalId;
+use crate::SegmentReader;
+use crate::TantivyError;
+use std::marker::PhantomData;
+use std::ops::Deref;
+
+pub struct MultiFruit {
+    sub_fruits: Vec<Option<Box<dyn Fruit>>>,
+}
+
+pub struct CollectorWrapper<TCollector: Collector>(TCollector);
+
+impl<TCollector: Collector> Collector for CollectorWrapper<TCollector> {
+    type Fruit = Box<dyn Fruit>;
+    type Child = Box<dyn BoxableSegmentCollector>;
+
+    fn for_segment(
+        &self,
+        segment_local_id: u32,
+        reader: &SegmentReader,
+    ) -> crate::Result<Box<dyn BoxableSegmentCollector>> {
+        let child = self.0.for_segment(segment_local_id, reader)?;
+        Ok(Box::new(SegmentCollectorWrapper(child)))
+    }
+
+    fn requires_scoring(&self) -> bool {
+        self.0.requires_scoring()
+    }
+
+    fn merge_fruits(
+        &self,
+        children: Vec<<Self as Collector>::Fruit>,
+    ) -> crate::Result<Box<dyn Fruit>> {
+        let typed_fruit: Vec<TCollector::Fruit> = children
+            .into_iter()
+            .map(|untyped_fruit| {
+                untyped_fruit
+                    .downcast::<TCollector::Fruit>()
+                    .map(|boxed_but_typed| *boxed_but_typed)
+                    .map_err(|_| {
+                        TantivyError::InvalidArgument("Failed to cast child fruit.".to_string())
+                    })
+            })
+            .collect::<crate::Result<_>>()?;
+        let merged_fruit = self.0.merge_fruits(typed_fruit)?;
+        Ok(Box::new(merged_fruit))
+    }
+}
+
+impl SegmentCollector for Box<dyn BoxableSegmentCollector> {
+    type Fruit = Box<dyn Fruit>;
+
+    fn collect(&mut self, doc: u32, score: Score) {
+        self.as_mut().collect(doc, score);
+    }
+
+    fn harvest(self) -> Box<dyn Fruit> {
+        BoxableSegmentCollector::harvest_from_box(self)
+    }
+}
+
+pub trait BoxableSegmentCollector {
+    fn collect(&mut self, doc: u32, score: Score);
+    fn harvest_from_box(self: Box<Self>) -> Box<dyn Fruit>;
+}
+
+pub struct SegmentCollectorWrapper<TSegmentCollector: SegmentCollector>(TSegmentCollector);
+
+impl<TSegmentCollector: SegmentCollector> BoxableSegmentCollector
+    for SegmentCollectorWrapper<TSegmentCollector>
+{
+    fn collect(&mut self, doc: u32, score: Score) {
+        self.0.collect(doc, score);
+    }
+
+    fn harvest_from_box(self: Box<Self>) -> Box<dyn Fruit> {
+        Box::new(self.0.harvest())
+    }
+}
+
+pub struct FruitHandle<TFruit: Fruit> {
+    pos: usize,
+    _phantom: PhantomData<TFruit>,
+}
+
+impl<TFruit: Fruit> FruitHandle<TFruit> {
+    pub fn extract(self, fruits: &mut MultiFruit) -> TFruit {
+        let boxed_fruit = fruits.sub_fruits[self.pos].take().expect("");
+        *boxed_fruit
+            .downcast::<TFruit>()
+            .map_err(|_| ())
+            .expect("Failed to downcast collector fruit.")
+    }
+}

 /// Multicollector makes it possible to collect on more than one collector.
 /// It should only be used for use cases where the Collector types is unknown
 /// at compile time.
-/// If the type of the collectors is known, you should prefer to use `ChainedCollector`.
+///
+/// If the type of the collectors is known, you can just group yours collectors
+/// in a tuple. See the
+/// [Combining several collectors section of the collector documentation](./index.html#combining-several-collectors).
 ///
 /// ```rust
-/// #[macro_use]
-/// extern crate tantivy;
-/// use tantivy::schema::{SchemaBuilder, TEXT};
-/// use tantivy::{Index, Result};
-/// use tantivy::collector::{CountCollector, TopCollector, MultiCollector};
+/// use tantivy::collector::{Count, TopDocs, MultiCollector};
 /// use tantivy::query::QueryParser;
+/// use tantivy::schema::{Schema, TEXT};
+/// use tantivy::{doc, Index};
 ///
-/// # fn main() { example().unwrap(); }
-/// fn example() -> Result<()> {
-///     let mut schema_builder = SchemaBuilder::new();
-///     let title = schema_builder.add_text_field("title", TEXT);
-///     let schema = schema_builder.build();
-///     let index = Index::create_in_ram(schema);
-///     {
-///         let mut index_writer = index.writer(3_000_000)?;
-///         index_writer.add_document(doc!(
-///             title => "The Name of the Wind",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "The Diary of Muadib",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "A Dairy Cow",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "The Diary of a Young Girl",
-///         ));
-///         index_writer.commit().unwrap();
-///     }
+/// let mut schema_builder = Schema::builder();
+/// let title = schema_builder.add_text_field("title", TEXT);
+/// let schema = schema_builder.build();
+/// let index = Index::create_in_ram(schema);
 ///
-///     index.load_searchers()?;
-///     let searcher = index.searcher();
+/// let mut index_writer = index.writer(3_000_000).unwrap();
+/// index_writer.add_document(doc!(title => "The Name of the Wind"));
+/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
+/// index_writer.add_document(doc!(title => "A Dairy Cow"));
+/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
+/// assert!(index_writer.commit().is_ok());
 ///
-///     {
-///         let mut top_collector = TopCollector::with_limit(2);
-///         let mut count_collector = CountCollector::default();
-///         {
-///             let mut collectors =
-///                 MultiCollector::from(vec![&mut top_collector, &mut count_collector]);
-///             let query_parser = QueryParser::for_index(&index, vec![title]);
-///             let query = query_parser.parse_query("diary")?;
-///             searcher.search(&*query, &mut collectors).unwrap();
-///         }
-///         assert_eq!(count_collector.count(), 2);
-///         assert!(top_collector.at_capacity());
-///     }
+/// let reader = index.reader().unwrap();
+/// let searcher = reader.searcher();
 ///
-///     Ok(())
-/// }
+/// let mut collectors = MultiCollector::new();
+/// let top_docs_handle = collectors.add_collector(TopDocs::with_limit(2));
+/// let count_handle = collectors.add_collector(Count);
+/// let query_parser = QueryParser::for_index(&index, vec![title]);
+/// let query = query_parser.parse_query("diary").unwrap();
+/// let mut multi_fruit = searcher.search(&query, &collectors).unwrap();
+///
+/// let count = count_handle.extract(&mut multi_fruit);
+/// let top_docs = top_docs_handle.extract(&mut multi_fruit);
+///
+/// assert_eq!(count, 2);
+/// assert_eq!(top_docs.len(), 2);
 /// ```
+#[allow(clippy::type_complexity)]
+#[derive(Default)]
 pub struct MultiCollector<'a> {
-    collectors: Vec<&'a mut Collector>,
+    collector_wrappers: Vec<
+        Box<dyn Collector<Child = Box<dyn BoxableSegmentCollector>, Fruit = Box<dyn Fruit>> + 'a>,
+    >,
 }

 impl<'a> MultiCollector<'a> {
-    /// Constructor
-    pub fn from(collectors: Vec<&'a mut Collector>) -> MultiCollector {
-        MultiCollector { collectors }
+    /// Create a new `MultiCollector`
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    /// Add a new collector to our `MultiCollector`.
+    pub fn add_collector<'b: 'a, TCollector: Collector + 'b>(
+        &mut self,
+        collector: TCollector,
+    ) -> FruitHandle<TCollector::Fruit> {
+        let pos = self.collector_wrappers.len();
+        self.collector_wrappers
+            .push(Box::new(CollectorWrapper(collector)));
+        FruitHandle {
+            pos,
+            _phantom: PhantomData,
+        }
    }
 }

 impl<'a> Collector for MultiCollector<'a> {
-    fn set_segment(
-        &mut self,
+    type Fruit = MultiFruit;
+    type Child = MultiCollectorChild;
+
+    fn for_segment(
+        &self,
        segment_local_id: SegmentLocalId,
        segment: &SegmentReader,
-    ) -> Result<()> {
-        for collector in &mut self.collectors {
-            collector.set_segment(segment_local_id, segment)?;
-        }
-        Ok(())
+    ) -> crate::Result<MultiCollectorChild> {
+        let children = self
+            .collector_wrappers
+            .iter()
+            .map(|collector_wrapper| collector_wrapper.for_segment(segment_local_id, segment))
+            .collect::<crate::Result<Vec<_>>>()?;
+        Ok(MultiCollectorChild { children })
    }

+    fn requires_scoring(&self) -> bool {
+        self.collector_wrappers
+            .iter()
+            .map(Deref::deref)
+            .any(Collector::requires_scoring)
+    }
+
+    fn merge_fruits(&self, segments_multifruits: Vec<MultiFruit>) -> crate::Result<MultiFruit> {
+        let mut segment_fruits_list: Vec<Vec<Box<dyn Fruit>>> = (0..self.collector_wrappers.len())
+            .map(|_| Vec::with_capacity(segments_multifruits.len()))
+            .collect::<Vec<_>>();
+        for segment_multifruit in segments_multifruits {
+            for (idx, segment_fruit_opt) in segment_multifruit.sub_fruits.into_iter().enumerate() {
+                if let Some(segment_fruit) = segment_fruit_opt {
+                    segment_fruits_list[idx].push(segment_fruit);
+                }
+            }
+        }
+        let sub_fruits = self
+            .collector_wrappers
+            .iter()
+            .zip(segment_fruits_list)
+            .map(|(child_collector, segment_fruits)| {
+                Ok(Some(child_collector.merge_fruits(segment_fruits)?))
+            })
+            .collect::<crate::Result<_>>()?;
+        Ok(MultiFruit { sub_fruits })
+    }
+}
+
+pub struct MultiCollectorChild {
+    children: Vec<Box<dyn BoxableSegmentCollector>>,
+}
+
+impl SegmentCollector for MultiCollectorChild {
+    type Fruit = MultiFruit;
+
    fn collect(&mut self, doc: DocId, score: Score) {
-        for collector in &mut self.collectors {
-            collector.collect(doc, score);
+        for child in &mut self.children {
+            child.collect(doc, score);
        }
    }
-    fn requires_scoring(&self) -> bool {
-        self.collectors
-            .iter()
-            .any(|collector| collector.requires_scoring())
+
+    fn harvest(self) -> MultiFruit {
+        MultiFruit {
+            sub_fruits: self
+                .children
+                .into_iter()
+                .map(|child| Some(child.harvest()))
+                .collect(),
+        }
    }
 }

@@ -100,20 +244,41 @@ impl<'a> Collector for MultiCollector<'a> {
 mod tests {

    use super::*;
-    use collector::{Collector, CountCollector, TopCollector};
+    use crate::collector::{Count, TopDocs};
+    use crate::query::TermQuery;
+    use crate::schema::IndexRecordOption;
+    use crate::schema::{Schema, TEXT};
+    use crate::Index;
+    use crate::Term;

    #[test]
    fn test_multi_collector() {
-        let mut top_collector = TopCollector::with_limit(2);
-        let mut count_collector = CountCollector::default();
+        let mut schema_builder = Schema::builder();
+        let text = schema_builder.add_text_field("text", TEXT);
+        let schema = schema_builder.build();
+
+        let index = Index::create_in_ram(schema);
        {
-            let mut collectors =
-                MultiCollector::from(vec![&mut top_collector, &mut count_collector]);
-            collectors.collect(1, 0.2);
-            collectors.collect(2, 0.1);
-            collectors.collect(3, 0.5);
+            let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+            index_writer.add_document(doc!(text=>"abc"));
+            index_writer.add_document(doc!(text=>"abc abc abc"));
+            index_writer.add_document(doc!(text=>"abc abc"));
+            index_writer.commit().unwrap();
+            index_writer.add_document(doc!(text=>""));
+            index_writer.add_document(doc!(text=>"abc abc abc abc"));
+            index_writer.add_document(doc!(text=>"abc"));
+            index_writer.commit().unwrap();
        }
-        assert_eq!(count_collector.count(), 3);
-        assert!(top_collector.at_capacity());
+        let searcher = index.reader().unwrap().searcher();
+        let term = Term::from_field_text(text, "abc");
+        let query = TermQuery::new(term, IndexRecordOption::Basic);
+
+        let mut collectors = MultiCollector::new();
+        let topdocs_handler = collectors.add_collector(TopDocs::with_limit(2));
+        let count_handler = collectors.add_collector(Count);
+        let mut multifruits = searcher.search(&query, &mut collectors).unwrap();
+
+        assert_eq!(count_handler.extract(&mut multifruits), 5);
+        assert_eq!(topdocs_handler.extract(&mut multifruits).len(), 2);
    }
 }
--- a/src/collector/tests.rs
+++ b/src/collector/tests.rs
@@ -0,0 +1,217 @@
+use super::*;
+use crate::core::SegmentReader;
+use crate::fastfield::BytesFastFieldReader;
+use crate::fastfield::FastFieldReader;
+use crate::schema::Field;
+use crate::DocAddress;
+use crate::DocId;
+use crate::Score;
+use crate::SegmentLocalId;
+
+pub const TEST_COLLECTOR_WITH_SCORE: TestCollector = TestCollector {
+    compute_score: true,
+};
+
+pub const TEST_COLLECTOR_WITHOUT_SCORE: TestCollector = TestCollector {
+    compute_score: true,
+};
+
+/// Stores all of the doc ids.
+/// This collector is only used for tests.
+/// It is unusable in pr
+///
+/// actise, as it does not store
+/// the segment ordinals
+pub struct TestCollector {
+    pub compute_score: bool,
+}
+
+pub struct TestSegmentCollector {
+    segment_id: SegmentLocalId,
+    fruit: TestFruit,
+}
+
+#[derive(Default)]
+pub struct TestFruit {
+    docs: Vec<DocAddress>,
+    scores: Vec<Score>,
+}
+
+impl TestFruit {
+    /// Return the list of matching documents exhaustively.
+    pub fn docs(&self) -> &[DocAddress] {
+        &self.docs[..]
+    }
+    pub fn scores(&self) -> &[Score] {
+        &self.scores[..]
+    }
+}
+
+impl Collector for TestCollector {
+    type Fruit = TestFruit;
+    type Child = TestSegmentCollector;
+
+    fn for_segment(
+        &self,
+        segment_id: SegmentLocalId,
+        _reader: &SegmentReader,
+    ) -> crate::Result<TestSegmentCollector> {
+        Ok(TestSegmentCollector {
+            segment_id,
+            fruit: TestFruit::default(),
+        })
+    }
+
+    fn requires_scoring(&self) -> bool {
+        self.compute_score
+    }
+
+    fn merge_fruits(&self, mut children: Vec<TestFruit>) -> crate::Result<TestFruit> {
+        children.sort_by_key(|fruit| {
+            if fruit.docs().is_empty() {
+                0
+            } else {
+                fruit.docs()[0].segment_ord()
+            }
+        });
+        let mut docs = vec![];
+        let mut scores = vec![];
+        for child in children {
+            docs.extend(child.docs());
+            scores.extend(child.scores);
+        }
+        Ok(TestFruit { docs, scores })
+    }
+}
+
+impl SegmentCollector for TestSegmentCollector {
+    type Fruit = TestFruit;
+
+    fn collect(&mut self, doc: DocId, score: Score) {
+        self.fruit.docs.push(DocAddress(self.segment_id, doc));
+        self.fruit.scores.push(score);
+    }
+
+    fn harvest(self) -> <Self as SegmentCollector>::Fruit {
+        self.fruit
+    }
+}
+
+/// Collects in order all of the fast fields for all of the
+/// doc in the `DocSet`
+///
+/// This collector is mainly useful for tests.
+pub struct FastFieldTestCollector {
+    field: Field,
+}
+
+pub struct FastFieldSegmentCollector {
+    vals: Vec<u64>,
+    reader: FastFieldReader<u64>,
+}
+
+impl FastFieldTestCollector {
+    pub fn for_field(field: Field) -> FastFieldTestCollector {
+        FastFieldTestCollector { field }
+    }
+}
+
+impl Collector for FastFieldTestCollector {
+    type Fruit = Vec<u64>;
+    type Child = FastFieldSegmentCollector;
+
+    fn for_segment(
+        &self,
+        _: SegmentLocalId,
+        segment_reader: &SegmentReader,
+    ) -> crate::Result<FastFieldSegmentCollector> {
+        let reader = segment_reader
+            .fast_fields()
+            .u64(self.field)
+            .expect("Requested field is not a fast field.");
+        Ok(FastFieldSegmentCollector {
+            vals: Vec::new(),
+            reader,
+        })
+    }
+
+    fn requires_scoring(&self) -> bool {
+        false
+    }
+
+    fn merge_fruits(&self, children: Vec<Vec<u64>>) -> crate::Result<Vec<u64>> {
+        Ok(children.into_iter().flat_map(|v| v.into_iter()).collect())
+    }
+}
+
+impl SegmentCollector for FastFieldSegmentCollector {
+    type Fruit = Vec<u64>;
+
+    fn collect(&mut self, doc: DocId, _score: Score) {
+        let val = self.reader.get(doc);
+        self.vals.push(val);
+    }
+
+    fn harvest(self) -> Vec<u64> {
+        self.vals
+    }
+}
+
+/// Collects in order all of the fast field bytes for all of the
+/// docs in the `DocSet`
+///
+/// This collector is mainly useful for tests.
+pub struct BytesFastFieldTestCollector {
+    field: Field,
+}
+
+pub struct BytesFastFieldSegmentCollector {
+    vals: Vec<u8>,
+    reader: BytesFastFieldReader,
+}
+
+impl BytesFastFieldTestCollector {
+    pub fn for_field(field: Field) -> BytesFastFieldTestCollector {
+        BytesFastFieldTestCollector { field }
+    }
+}
+
+impl Collector for BytesFastFieldTestCollector {
+    type Fruit = Vec<u8>;
+    type Child = BytesFastFieldSegmentCollector;
+
+    fn for_segment(
+        &self,
+        _segment_local_id: u32,
+        segment_reader: &SegmentReader,
+    ) -> crate::Result<BytesFastFieldSegmentCollector> {
+        Ok(BytesFastFieldSegmentCollector {
+            vals: Vec::new(),
+            reader: segment_reader
+                .fast_fields()
+                .bytes(self.field)
+                .expect("Field is not a bytes fast field."),
+        })
+    }
+
+    fn requires_scoring(&self) -> bool {
+        false
+    }
+
+    fn merge_fruits(&self, children: Vec<Vec<u8>>) -> crate::Result<Vec<u8>> {
+        Ok(children.into_iter().flat_map(|c| c.into_iter()).collect())
+    }
+}
+
+impl SegmentCollector for BytesFastFieldSegmentCollector {
+    type Fruit = Vec<u8>;
+
+    fn collect(&mut self, doc: u32, _score: Score) {
+        let data = self.reader.get_bytes(doc);
+        self.vals.extend(data);
+    }
+
+    fn harvest(self) -> <Self as SegmentCollector>::Fruit {
+        self.vals
+    }
+}
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -1,244 +1,383 @@
-use super::Collector;
+use crate::DocAddress;
+use crate::DocId;
+use crate::SegmentLocalId;
+use crate::SegmentReader;
+use serde::export::PhantomData;
 use std::cmp::Ordering;
 use std::collections::BinaryHeap;
-use DocAddress;
-use DocId;
-use Result;
-use Score;
-use SegmentLocalId;
-use SegmentReader;

-// Rust heap is a max-heap and we need a min heap.
-#[derive(Clone, Copy)]
-struct GlobalScoredDoc {
-    score: Score,
-    doc_address: DocAddress,
+/// Contains a feature (field, score, etc.) of a document along with the document address.
+///
+/// It has a custom implementation of `PartialOrd` that reverses the order. This is because the
+/// default Rust heap is a max heap, whereas a min heap is needed.
+///
+/// Additionally, it guarantees stable sorting: in case of a tie on the feature, the document
+/// address is used.
+///
+/// WARNING: equality is not what you would expect here.
+/// Two elements are equal if their feature is equal, and regardless of whether `doc`
+/// is equal. This should be perfectly fine for this usage, but let's make sure this
+/// struct is never public.
+pub(crate) struct ComparableDoc<T, D> {
+    pub feature: T,
+    pub doc: D,
 }

-impl PartialOrd for GlobalScoredDoc {
-    fn partial_cmp(&self, other: &GlobalScoredDoc) -> Option<Ordering> {
+impl<T: PartialOrd, D: PartialOrd> PartialOrd for ComparableDoc<T, D> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
 }

-impl Ord for GlobalScoredDoc {
+impl<T: PartialOrd, D: PartialOrd> Ord for ComparableDoc<T, D> {
    #[inline]
-    fn cmp(&self, other: &GlobalScoredDoc) -> Ordering {
-        other
-            .score
-            .partial_cmp(&self.score)
-            .unwrap_or_else(|| other.doc_address.cmp(&self.doc_address))
+    fn cmp(&self, other: &Self) -> Ordering {
+        // Reversed to make BinaryHeap work as a min-heap
+        let by_feature = other
+            .feature
+            .partial_cmp(&self.feature)
+            .unwrap_or(Ordering::Equal);
+
+        let lazy_by_doc_address = || self.doc.partial_cmp(&other.doc).unwrap_or(Ordering::Equal);
+
+        // In case of a tie on the feature, we sort by ascending
+        // `DocAddress` in order to ensure a stable sorting of the
+        // documents.
+        by_feature.then_with(lazy_by_doc_address)
    }
 }

-impl PartialEq for GlobalScoredDoc {
-    fn eq(&self, other: &GlobalScoredDoc) -> bool {
+impl<T: PartialOrd, D: PartialOrd> PartialEq for ComparableDoc<T, D> {
+    fn eq(&self, other: &Self) -> bool {
        self.cmp(other) == Ordering::Equal
    }
 }

-impl Eq for GlobalScoredDoc {}
+impl<T: PartialOrd, D: PartialOrd> Eq for ComparableDoc<T, D> {}

-/// The Top Collector keeps track of the K documents
-/// with the best scores.
-///
-/// The implementation is based on a `BinaryHeap`.
-/// The theorical complexity for collecting the top `K` out of `n` documents
-/// is `O(n log K)`.
-///
-/// ```rust
-/// #[macro_use]
-/// extern crate tantivy;
-/// use tantivy::schema::{SchemaBuilder, TEXT};
-/// use tantivy::{Index, Result, DocId, Score};
-/// use tantivy::collector::TopCollector;
-/// use tantivy::query::QueryParser;
-///
-/// # fn main() { example().unwrap(); }
-/// fn example() -> Result<()> {
-///     let mut schema_builder = SchemaBuilder::new();
-///     let title = schema_builder.add_text_field("title", TEXT);
-///     let schema = schema_builder.build();
-///     let index = Index::create_in_ram(schema);
-///     {
-///         let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
-///         index_writer.add_document(doc!(
-///             title => "The Name of the Wind",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "The Diary of Muadib",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "A Dairy Cow",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "The Diary of a Young Girl",
-///         ));
-///         index_writer.commit().unwrap();
-///     }
-///
-///     index.load_searchers()?;
-///     let searcher = index.searcher();
-///
-///     {
-///	        let mut top_collector = TopCollector::with_limit(2);
-///         let query_parser = QueryParser::for_index(&index, vec![title]);
-///         let query = query_parser.parse_query("diary")?;
-///         searcher.search(&*query, &mut top_collector).unwrap();
-///
-///         let score_docs: Vec<(Score, DocId)> = top_collector
-///           .score_docs()
-///           .into_iter()
-///           .map(|(score, doc_address)| (score, doc_address.doc()))
-///           .collect();
-///
-///         assert_eq!(score_docs, vec![(0.7261542, 1), (0.6099695, 3)]);
-///     }
-///
-///     Ok(())
-/// }
-/// ```
-pub struct TopCollector {
-    limit: usize,
-    heap: BinaryHeap<GlobalScoredDoc>,
-    segment_id: u32,
+pub(crate) struct TopCollector<T> {
+    pub limit: usize,
+    pub offset: usize,
+    _marker: PhantomData<T>,
 }

-impl TopCollector {
+impl<T> TopCollector<T>
+where
+    T: PartialOrd + Clone,
+{
    /// Creates a top collector, with a number of documents equal to "limit".
    ///
    /// # Panics
    /// The method panics if limit is 0
-    pub fn with_limit(limit: usize) -> TopCollector {
+    pub fn with_limit(limit: usize) -> TopCollector<T> {
        if limit < 1 {
            panic!("Limit must be strictly greater than 0.");
        }
-        TopCollector {
+        Self {
            limit,
-            heap: BinaryHeap::with_capacity(limit),
-            segment_id: 0,
+            offset: 0,
+            _marker: PhantomData,
        }
    }

-    /// Returns K best documents sorted in decreasing order.
+    /// Skip the first "offset" documents when collecting.
    ///
-    /// Calling this method triggers the sort.
-    /// The result of the sort is not cached.
-    pub fn docs(&self) -> Vec<DocAddress> {
-        self.score_docs()
-            .into_iter()
-            .map(|score_doc| score_doc.1)
-            .collect()
+    /// This is equivalent to `OFFSET` in MySQL or PostgreSQL and `start` in
+    /// Lucene's TopDocsCollector.
+    pub fn and_offset(mut self, offset: usize) -> TopCollector<T> {
+        self.offset = offset;
+        self
    }

-    /// Returns K best ScoredDocument sorted in decreasing order.
-    ///
-    /// Calling this method triggers the sort.
-    /// The result of the sort is not cached.
-    pub fn score_docs(&self) -> Vec<(Score, DocAddress)> {
-        let mut scored_docs: Vec<GlobalScoredDoc> = self.heap.iter().cloned().collect();
-        scored_docs.sort();
-        scored_docs
+    pub fn merge_fruits(
+        &self,
+        children: Vec<Vec<(T, DocAddress)>>,
+    ) -> crate::Result<Vec<(T, DocAddress)>> {
+        if self.limit == 0 {
+            return Ok(Vec::new());
+        }
+        let mut top_collector = BinaryHeap::new();
+        for child_fruit in children {
+            for (feature, doc) in child_fruit {
+                if top_collector.len() < (self.limit + self.offset) {
+                    top_collector.push(ComparableDoc { feature, doc });
+                } else if let Some(mut head) = top_collector.peek_mut() {
+                    if head.feature < feature {
+                        *head = ComparableDoc { feature, doc };
+                    }
+                }
+            }
+        }
+        Ok(top_collector
+            .into_sorted_vec()
            .into_iter()
-            .map(|GlobalScoredDoc { score, doc_address }| (score, doc_address))
+            .skip(self.offset)
+            .map(|cdoc| (cdoc.feature, cdoc.doc))
+            .collect())
+    }
+
+    pub(crate) fn for_segment<F: PartialOrd>(
+        &self,
+        segment_id: SegmentLocalId,
+        _: &SegmentReader,
+    ) -> crate::Result<TopSegmentCollector<F>> {
+        Ok(TopSegmentCollector::new(
+            segment_id,
+            self.limit + self.offset,
+        ))
+    }
+
+    /// Create a new TopCollector with the same limit and offset.
+    ///
+    /// Ideally we would use Into but the blanket implementation seems to cause the Scorer traits
+    /// to fail.
+    #[doc(hidden)]
+    pub(crate) fn into_tscore<TScore: PartialOrd + Clone>(self) -> TopCollector<TScore> {
+        TopCollector {
+            limit: self.limit,
+            offset: self.offset,
+            _marker: PhantomData,
+        }
+    }
+}
+
+/// The Top Collector keeps track of the K documents
+/// sorted by type `T`.
+///
+/// The implementation is based on a `BinaryHeap`.
+/// The theorical complexity for collecting the top `K` out of `n` documents
+/// is `O(n log K)`.
+pub(crate) struct TopSegmentCollector<T> {
+    limit: usize,
+    heap: BinaryHeap<ComparableDoc<T, DocId>>,
+    segment_id: u32,
+}
+
+impl<T: PartialOrd> TopSegmentCollector<T> {
+    fn new(segment_id: SegmentLocalId, limit: usize) -> TopSegmentCollector<T> {
+        TopSegmentCollector {
+            limit,
+            heap: BinaryHeap::with_capacity(limit),
+            segment_id,
+        }
+    }
+}
+
+impl<T: PartialOrd + Clone> TopSegmentCollector<T> {
+    pub fn harvest(self) -> Vec<(T, DocAddress)> {
+        let segment_id = self.segment_id;
+        self.heap
+            .into_sorted_vec()
+            .into_iter()
+            .map(|comparable_doc| {
+                (
+                    comparable_doc.feature,
+                    DocAddress(segment_id, comparable_doc.doc),
+                )
+            })
            .collect()
    }

    /// Return true iff at least K documents have gone through
    /// the collector.
-    #[inline]
-    pub fn at_capacity(&self) -> bool {
+    #[inline(always)]
+    pub(crate) fn at_capacity(&self) -> bool {
        self.heap.len() >= self.limit
    }
-}

-impl Collector for TopCollector {
-    fn set_segment(&mut self, segment_id: SegmentLocalId, _: &SegmentReader) -> Result<()> {
-        self.segment_id = segment_id;
-        Ok(())
-    }
-
-    fn collect(&mut self, doc: DocId, score: Score) {
+    /// Collects a document scored by the given feature
+    ///
+    /// It collects documents until it has reached the max capacity. Once it reaches capacity, it
+    /// will compare the lowest scoring item with the given one and keep whichever is greater.
+    #[inline(always)]
+    pub fn collect(&mut self, doc: DocId, feature: T) {
        if self.at_capacity() {
            // It's ok to unwrap as long as a limit of 0 is forbidden.
-            let limit_doc: GlobalScoredDoc = *self.heap
-                .peek()
-                .expect("Top collector with size 0 is forbidden");
-            if limit_doc.score < score {
-                let mut mut_head = self.heap
-                    .peek_mut()
-                    .expect("Top collector with size 0 is forbidden");
-                mut_head.score = score;
-                mut_head.doc_address = DocAddress(self.segment_id, doc);
+            if let Some(limit_feature) = self.heap.peek().map(|head| head.feature.clone()) {
+                if limit_feature < feature {
+                    if let Some(mut head) = self.heap.peek_mut() {
+                        head.feature = feature;
+                        head.doc = doc;
+                    }
+                }
            }
        } else {
-            let wrapped_doc = GlobalScoredDoc {
-                score,
-                doc_address: DocAddress(self.segment_id, doc),
-            };
-            self.heap.push(wrapped_doc);
+            // we have not reached capacity yet, so we can just push the
+            // element.
+            self.heap.push(ComparableDoc { feature, doc });
        }
    }
-
-    fn requires_scoring(&self) -> bool {
-        true
-    }
 }

 #[cfg(test)]
 mod tests {
-
-    use super::*;
-    use collector::Collector;
-    use DocId;
-    use Score;
+    use super::{TopCollector, TopSegmentCollector};
+    use crate::DocAddress;

    #[test]
    fn test_top_collector_not_at_capacity() {
-        let mut top_collector = TopCollector::with_limit(4);
+        let mut top_collector = TopSegmentCollector::new(0, 4);
        top_collector.collect(1, 0.8);
        top_collector.collect(3, 0.2);
        top_collector.collect(5, 0.3);
-        assert!(!top_collector.at_capacity());
-        let score_docs: Vec<(Score, DocId)> = top_collector
-            .score_docs()
-            .into_iter()
-            .map(|(score, doc_address)| (score, doc_address.doc()))
-            .collect();
-        assert_eq!(score_docs, vec![(0.8, 1), (0.3, 5), (0.2, 3)]);
+        assert_eq!(
+            top_collector.harvest(),
+            vec![
+                (0.8, DocAddress(0, 1)),
+                (0.3, DocAddress(0, 5)),
+                (0.2, DocAddress(0, 3))
+            ]
+        );
    }

    #[test]
    fn test_top_collector_at_capacity() {
-        let mut top_collector = TopCollector::with_limit(4);
+        let mut top_collector = TopSegmentCollector::new(0, 4);
        top_collector.collect(1, 0.8);
        top_collector.collect(3, 0.2);
        top_collector.collect(5, 0.3);
        top_collector.collect(7, 0.9);
        top_collector.collect(9, -0.2);
-        assert!(top_collector.at_capacity());
-        {
-            let score_docs: Vec<(Score, DocId)> = top_collector
-                .score_docs()
-                .into_iter()
-                .map(|(score, doc_address)| (score, doc_address.doc()))
-                .collect();
-            assert_eq!(score_docs, vec![(0.9, 7), (0.8, 1), (0.3, 5), (0.2, 3)]);
-        }
-        {
-            let docs: Vec<DocId> = top_collector
-                .docs()
-                .into_iter()
-                .map(|doc_address| doc_address.doc())
-                .collect();
-            assert_eq!(docs, vec![7, 1, 5, 3]);
-        }
+        assert_eq!(
+            top_collector.harvest(),
+            vec![
+                (0.9, DocAddress(0, 7)),
+                (0.8, DocAddress(0, 1)),
+                (0.3, DocAddress(0, 5)),
+                (0.2, DocAddress(0, 3))
+            ]
+        );
    }

    #[test]
-    #[should_panic]
-    fn test_top_0() {
-        TopCollector::with_limit(0);
+    fn test_top_segment_collector_stable_ordering_for_equal_feature() {
+        // given that the documents are collected in ascending doc id order,
+        // when harvesting we have to guarantee stable sorting in case of a tie
+        // on the score
+        let doc_ids_collection = [4, 5, 6];
+        let score = 3.14;
+
+        let mut top_collector_limit_2 = TopSegmentCollector::new(0, 2);
+        for id in &doc_ids_collection {
+            top_collector_limit_2.collect(*id, score);
+        }
+
+        let mut top_collector_limit_3 = TopSegmentCollector::new(0, 3);
+        for id in &doc_ids_collection {
+            top_collector_limit_3.collect(*id, score);
+        }
+
+        assert_eq!(
+            top_collector_limit_2.harvest(),
+            top_collector_limit_3.harvest()[..2].to_vec(),
+        );
    }

+    #[test]
+    fn test_top_collector_with_limit_and_offset() {
+        let collector = TopCollector::with_limit(2).and_offset(1);
+
+        let results = collector
+            .merge_fruits(vec![vec![
+                (0.9, DocAddress(0, 1)),
+                (0.8, DocAddress(0, 2)),
+                (0.7, DocAddress(0, 3)),
+                (0.6, DocAddress(0, 4)),
+                (0.5, DocAddress(0, 5)),
+            ]])
+            .unwrap();
+
+        assert_eq!(
+            results,
+            vec![(0.8, DocAddress(0, 2)), (0.7, DocAddress(0, 3)),]
+        );
+    }
+
+    #[test]
+    fn test_top_collector_with_limit_larger_than_set_and_offset() {
+        let collector = TopCollector::with_limit(2).and_offset(1);
+
+        let results = collector
+            .merge_fruits(vec![vec![(0.9, DocAddress(0, 1)), (0.8, DocAddress(0, 2))]])
+            .unwrap();
+
+        assert_eq!(results, vec![(0.8, DocAddress(0, 2)),]);
+    }
+
+    #[test]
+    fn test_top_collector_with_limit_and_offset_larger_than_set() {
+        let collector = TopCollector::with_limit(2).and_offset(20);
+
+        let results = collector
+            .merge_fruits(vec![vec![(0.9, DocAddress(0, 1)), (0.8, DocAddress(0, 2))]])
+            .unwrap();
+
+        assert_eq!(results, vec![]);
+    }
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+    use super::TopSegmentCollector;
+    use test::Bencher;
+
+    #[bench]
+    fn bench_top_segment_collector_collect_not_at_capacity(b: &mut Bencher) {
+        let mut top_collector = TopSegmentCollector::new(0, 400);
+
+        b.iter(|| {
+            for i in 0..100 {
+                top_collector.collect(i, 0.8);
+            }
+        });
+    }
+
+    #[bench]
+    fn bench_top_segment_collector_collect_at_capacity(b: &mut Bencher) {
+        let mut top_collector = TopSegmentCollector::new(0, 100);
+
+        for i in 0..100 {
+            top_collector.collect(i, 0.8);
+        }
+
+        b.iter(|| {
+            for i in 0..100 {
+                top_collector.collect(i, 0.8);
+            }
+        });
+    }
+
+    #[bench]
+    fn bench_top_segment_collector_collect_and_harvest_many_ties(b: &mut Bencher) {
+        b.iter(|| {
+            let mut top_collector = TopSegmentCollector::new(0, 100);
+
+            for i in 0..100 {
+                top_collector.collect(i, 0.8);
+            }
+
+            // it would be nice to be able to do the setup N times but still
+            // measure only harvest(). We can't since harvest() consumes
+            // the top_collector.
+            top_collector.harvest()
+        });
+    }
+
+    #[bench]
+    fn bench_top_segment_collector_collect_and_harvest_no_tie(b: &mut Bencher) {
+        b.iter(|| {
+            let mut top_collector = TopSegmentCollector::new(0, 100);
+            let mut score = 1.0;
+
+            for i in 0..100 {
+                score += 1.0;
+                top_collector.collect(i, score);
+            }
+
+            // it would be nice to be able to do the setup N times but still
+            // measure only harvest(). We can't since harvest() consumes
+            // the top_collector.
+            top_collector.harvest()
+        });
+    }
 }
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -0,0 +1,831 @@
+use super::Collector;
+use crate::collector::custom_score_top_collector::CustomScoreTopCollector;
+use crate::collector::top_collector::TopSegmentCollector;
+use crate::collector::top_collector::{ComparableDoc, TopCollector};
+use crate::collector::tweak_score_top_collector::TweakedScoreTopCollector;
+use crate::collector::{
+    CustomScorer, CustomSegmentScorer, ScoreSegmentTweaker, ScoreTweaker, SegmentCollector,
+};
+use crate::fastfield::FastFieldReader;
+use crate::query::Weight;
+use crate::schema::Field;
+use crate::DocAddress;
+use crate::DocId;
+use crate::Score;
+use crate::SegmentLocalId;
+use crate::SegmentReader;
+use std::collections::BinaryHeap;
+use std::fmt;
+
+/// The `TopDocs` collector keeps track of the top `K` documents
+/// sorted by their score.
+///
+/// The implementation is based on a `BinaryHeap`.
+/// The theorical complexity for collecting the top `K` out of `n` documents
+/// is `O(n log K)`.
+///
+/// This collector guarantees a stable sorting in case of a tie on the
+/// document score. As such, it is suitable to implement pagination.
+///
+/// ```rust
+/// use tantivy::collector::TopDocs;
+/// use tantivy::query::QueryParser;
+/// use tantivy::schema::{Schema, TEXT};
+/// use tantivy::{doc, DocAddress, Index};
+///
+/// let mut schema_builder = Schema::builder();
+/// let title = schema_builder.add_text_field("title", TEXT);
+/// let schema = schema_builder.build();
+/// let index = Index::create_in_ram(schema);
+///
+/// let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+/// index_writer.add_document(doc!(title => "The Name of the Wind"));
+/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
+/// index_writer.add_document(doc!(title => "A Dairy Cow"));
+/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
+/// assert!(index_writer.commit().is_ok());
+///
+/// let reader = index.reader().unwrap();
+/// let searcher = reader.searcher();
+///
+/// let query_parser = QueryParser::for_index(&index, vec![title]);
+/// let query = query_parser.parse_query("diary").unwrap();
+/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2)).unwrap();
+///
+/// assert_eq!(top_docs[0].1, DocAddress(0, 1));
+/// assert_eq!(top_docs[1].1, DocAddress(0, 3));
+/// ```
+pub struct TopDocs(TopCollector<Score>);
+
+impl fmt::Debug for TopDocs {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "TopDocs(limit={}, offset={})",
+            self.0.limit, self.0.offset
+        )
+    }
+}
+
+struct ScorerByFastFieldReader {
+    ff_reader: FastFieldReader<u64>,
+}
+
+impl CustomSegmentScorer<u64> for ScorerByFastFieldReader {
+    fn score(&mut self, doc: DocId) -> u64 {
+        self.ff_reader.get_u64(u64::from(doc))
+    }
+}
+
+struct ScorerByField {
+    field: Field,
+}
+
+impl CustomScorer<u64> for ScorerByField {
+    type Child = ScorerByFastFieldReader;
+
+    fn segment_scorer(&self, segment_reader: &SegmentReader) -> crate::Result<Self::Child> {
+        let ff_reader = segment_reader
+            .fast_fields()
+            .u64(self.field)
+            .ok_or_else(|| {
+                crate::TantivyError::SchemaError(format!(
+                    "Field requested ({:?}) is not a i64/u64 fast field.",
+                    self.field
+                ))
+            })?;
+        Ok(ScorerByFastFieldReader { ff_reader })
+    }
+}
+
+impl TopDocs {
+    /// Creates a top score collector, with a number of documents equal to "limit".
+    ///
+    /// # Panics
+    /// The method panics if limit is 0
+    pub fn with_limit(limit: usize) -> TopDocs {
+        TopDocs(TopCollector::with_limit(limit))
+    }
+
+    /// Skip the first "offset" documents when collecting.
+    ///
+    /// This is equivalent to `OFFSET` in MySQL or PostgreSQL and `start` in
+    /// Lucene's TopDocsCollector.
+    ///
+    /// ```rust
+    /// use tantivy::collector::TopDocs;
+    /// use tantivy::query::QueryParser;
+    /// use tantivy::schema::{Schema, TEXT};
+    /// use tantivy::{doc, DocAddress, Index};
+    ///
+    /// let mut schema_builder = Schema::builder();
+    /// let title = schema_builder.add_text_field("title", TEXT);
+    /// let schema = schema_builder.build();
+    /// let index = Index::create_in_ram(schema);
+    ///
+    /// let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+    /// index_writer.add_document(doc!(title => "The Name of the Wind"));
+    /// index_writer.add_document(doc!(title => "The Diary of Muadib"));
+    /// index_writer.add_document(doc!(title => "A Dairy Cow"));
+    /// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
+    /// index_writer.add_document(doc!(title => "The Diary of Lena Mukhina"));
+    /// assert!(index_writer.commit().is_ok());
+    ///
+    /// let reader = index.reader().unwrap();
+    /// let searcher = reader.searcher();
+    ///
+    /// let query_parser = QueryParser::for_index(&index, vec![title]);
+    /// let query = query_parser.parse_query("diary").unwrap();
+    /// let top_docs = searcher.search(&query, &TopDocs::with_limit(2).and_offset(1)).unwrap();
+    ///
+    /// assert_eq!(top_docs.len(), 2);
+    /// assert_eq!(top_docs[0].1, DocAddress(0, 4));
+    /// assert_eq!(top_docs[1].1, DocAddress(0, 3));
+    /// ```
+    pub fn and_offset(self, offset: usize) -> TopDocs {
+        TopDocs(self.0.and_offset(offset))
+    }
+
+    /// Set top-K to rank documents by a given fast field.
+    ///
+    /// ```rust
+    /// # use tantivy::schema::{Schema, FAST, TEXT};
+    /// # use tantivy::{doc, Index, DocAddress};
+    /// # use tantivy::query::{Query, QueryParser};
+    /// use tantivy::Searcher;
+    /// use tantivy::collector::TopDocs;
+    /// use tantivy::schema::Field;
+    ///
+    /// # fn main() -> tantivy::Result<()> {
+    /// #   let mut schema_builder = Schema::builder();
+    /// #   let title = schema_builder.add_text_field("title", TEXT);
+    /// #   let rating = schema_builder.add_u64_field("rating", FAST);
+    /// #   let schema = schema_builder.build();
+    /// #  
+    /// #   let index = Index::create_in_ram(schema);
+    /// #   let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
+    /// #   index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64));
+    /// #   index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64));
+    /// #   index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64));
+    /// #   index_writer.add_document(doc!(title => "The Diary of a Young Girl", rating => 80u64));
+    /// #   assert!(index_writer.commit().is_ok());
+    /// #   let reader = index.reader().unwrap();
+    /// #   let query = QueryParser::for_index(&index, vec![title]).parse_query("diary")?;
+    /// #   let top_docs = docs_sorted_by_rating(&reader.searcher(), &query, rating)?;
+    /// #   assert_eq!(top_docs,
+    /// #            vec![(97u64, DocAddress(0u32, 1)),
+    /// #                 (80u64, DocAddress(0u32, 3))]);
+    /// #   Ok(())
+    /// # }
+    ///
+    ///
+    /// /// Searches the document matching the given query, and
+    /// /// collects the top 10 documents, order by the u64-`field`
+    /// /// given in argument.
+    /// ///
+    /// /// `field` is required to be a FAST field.
+    /// fn docs_sorted_by_rating(searcher: &Searcher,
+    ///                          query: &dyn Query,
+    ///                          sort_by_field: Field)
+    ///     -> tantivy::Result<Vec<(u64, DocAddress)>> {
+    ///
+    ///     // This is where we build our topdocs collector
+    ///     //
+    ///     // Note the generics parameter that needs to match the
+    ///     // type `sort_by_field`.
+    ///     let top_docs_by_rating = TopDocs
+    ///                 ::with_limit(10)
+    ///                  .order_by_u64_field(sort_by_field);
+    ///     
+    ///     // ... and here are our documents. Note this is a simple vec.
+    ///     // The `u64` in the pair is the value of our fast field for
+    ///     // each documents.
+    ///     //
+    ///     // The vec is sorted decreasingly by `sort_by_field`, and has a
+    ///     // length of 10, or less if not enough documents matched the
+    ///     // query.
+    ///     let resulting_docs: Vec<(u64, DocAddress)> =
+    ///          searcher.search(query, &top_docs_by_rating)?;
+    ///     
+    ///     Ok(resulting_docs)
+    /// }
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// May panic if the field requested is not a fast field.
+    ///
+    pub fn order_by_u64_field(
+        self,
+        field: Field,
+    ) -> impl Collector<Fruit = Vec<(u64, DocAddress)>> {
+        self.custom_score(ScorerByField { field })
+    }
+
+    /// Ranks the documents using a custom score.
+    ///
+    /// This method offers a convenient way to tweak or replace
+    /// the documents score. As suggested by the prototype you can
+    /// manually define your own [`ScoreTweaker`](./trait.ScoreTweaker.html)
+    /// and pass it as an argument, but there is a much simpler way to
+    /// tweak your score: you can use a closure as in the following
+    /// example.
+    ///
+    /// # Example
+    ///
+    /// Typically, you will want to rely on one or more fast fields,
+    /// to alter the original relevance `Score`.
+    ///
+    /// For instance, in the following, we assume that we are implementing
+    /// an e-commerce website that has a fast field called `popularity`
+    /// that rates whether a product is typically often bought by users.
+    ///
+    /// In the following example will will tweak our ranking a bit by
+    /// boosting popular products a notch.
+    ///  
+    /// In more serious application, this tweaking could involved running a
+    /// learning-to-rank model over various features
+    ///
+    /// ```rust
+    /// # use tantivy::schema::{Schema, FAST, TEXT};
+    /// # use tantivy::{doc, Index, DocAddress, DocId, Score};
+    /// # use tantivy::query::QueryParser;
+    /// use tantivy::SegmentReader;
+    /// use tantivy::collector::TopDocs;
+    /// use tantivy::schema::Field;
+    ///
+    /// fn create_schema() -> Schema {
+    ///    let mut schema_builder = Schema::builder();
+    ///    schema_builder.add_text_field("product_name", TEXT);
+    ///    schema_builder.add_u64_field("popularity", FAST);
+    ///    schema_builder.build()
+    /// }
+    ///
+    /// fn create_index() -> tantivy::Result<Index> {
+    ///   let schema = create_schema();
+    ///   let index = Index::create_in_ram(schema);
+    ///   let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
+    ///   let product_name = index.schema().get_field("product_name").unwrap();
+    ///   let popularity: Field = index.schema().get_field("popularity").unwrap();
+    ///   index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64));
+    ///   index_writer.add_document(doc!(product_name => "A Dairy Cow", popularity => 10u64));
+    ///   index_writer.add_document(doc!(product_name => "The Diary of a Young Girl", popularity => 15u64));
+    ///   index_writer.commit()?;
+    ///   Ok(index)
+    /// }
+    ///
+    /// let index = create_index().unwrap();
+    /// let product_name = index.schema().get_field("product_name").unwrap();
+    /// let popularity: Field = index.schema().get_field("popularity").unwrap();
+    ///
+    /// let user_query_str = "diary";
+    /// let query_parser = QueryParser::for_index(&index, vec![product_name]);
+    /// let query = query_parser.parse_query(user_query_str).unwrap();
+    ///
+    /// // This is where we build our collector with our custom score.
+    /// let top_docs_by_custom_score = TopDocs
+    ///         ::with_limit(10)
+    ///          .tweak_score(move |segment_reader: &SegmentReader| {
+    ///             // The argument is a function that returns our scoring
+    ///             // function.
+    ///             //
+    ///             // The point of this "mother" function is to gather all
+    ///             // of the segment level information we need for scoring.
+    ///             // Typically, fast_fields.
+    ///             //
+    ///             // In our case, we will get a reader for the popularity
+    ///             // fast field.
+    ///             let popularity_reader =
+    ///                 segment_reader.fast_fields().u64(popularity).unwrap();
+    ///
+    ///             // We can now define our actual scoring function
+    ///             move |doc: DocId, original_score: Score| {
+    ///                 let popularity: u64 = popularity_reader.get(doc);
+    ///                 // Well.. For the sake of the example we use a simple logarithm
+    ///                 // function.
+    ///                 let popularity_boost_score = ((2u64 + popularity) as Score).log2();
+    ///                 popularity_boost_score * original_score
+    ///             }
+    ///           });
+    /// let reader = index.reader().unwrap();
+    /// let searcher = reader.searcher();
+    /// // ... and here are our documents. Note this is a simple vec.
+    /// // The `Score` in the pair is our tweaked score.
+    /// let resulting_docs: Vec<(Score, DocAddress)> =
+    ///      searcher.search(&query, &top_docs_by_custom_score).unwrap();
+    /// ```
+    ///
+    /// # See also
+    /// [custom_score(...)](#method.custom_score).
+    pub fn tweak_score<TScore, TScoreSegmentTweaker, TScoreTweaker>(
+        self,
+        score_tweaker: TScoreTweaker,
+    ) -> impl Collector<Fruit = Vec<(TScore, DocAddress)>>
+    where
+        TScore: 'static + Send + Sync + Clone + PartialOrd,
+        TScoreSegmentTweaker: ScoreSegmentTweaker<TScore> + 'static,
+        TScoreTweaker: ScoreTweaker<TScore, Child = TScoreSegmentTweaker> + Send + Sync,
+    {
+        TweakedScoreTopCollector::new(score_tweaker, self.0.into_tscore())
+    }
+
+    /// Ranks the documents using a custom score.
+    ///
+    /// This method offers a convenient way to use a different score.
+    ///
+    /// As suggested by the prototype you can manually define your
+    /// own [`CustomScorer`](./trait.CustomScorer.html)
+    /// and pass it as an argument, but there is a much simpler way to
+    /// tweak your score: you can use a closure as in the following
+    /// example.
+    ///
+    /// # Limitation
+    ///
+    /// This method only makes it possible to compute the score from a given
+    /// `DocId`, fastfield values for the doc and any information you could
+    /// have precomputed beforehands. It does not make it possible for instance
+    /// to compute something like TfIdf as it does not have access to the list of query
+    /// terms present in the document, nor the term frequencies for the different terms.
+    ///
+    /// It can be used if your search engine relies on a learning-to-rank model for instance,
+    /// which does not rely on the term frequencies or positions as features.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// # use tantivy::schema::{Schema, FAST, TEXT};
+    /// # use tantivy::{doc, Index, DocAddress, DocId};
+    /// # use tantivy::query::QueryParser;
+    /// use tantivy::SegmentReader;
+    /// use tantivy::collector::TopDocs;
+    /// use tantivy::schema::Field;
+    ///
+    /// # fn create_schema() -> Schema {
+    /// #    let mut schema_builder = Schema::builder();
+    /// #    schema_builder.add_text_field("product_name", TEXT);
+    /// #    schema_builder.add_u64_field("popularity", FAST);
+    /// #    schema_builder.add_u64_field("boosted", FAST);
+    /// #    schema_builder.build()
+    /// # }
+    /// #
+    /// # fn main() -> tantivy::Result<()> {
+    /// #   let schema = create_schema();
+    /// #   let index = Index::create_in_ram(schema);
+    /// #   let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
+    /// #   let product_name = index.schema().get_field("product_name").unwrap();
+    /// #   
+    /// let popularity: Field = index.schema().get_field("popularity").unwrap();
+    /// let boosted: Field = index.schema().get_field("boosted").unwrap();
+    /// #   index_writer.add_document(doc!(boosted=>1u64, product_name => "The Diary of Muadib", popularity => 1u64));
+    /// #   index_writer.add_document(doc!(boosted=>0u64, product_name => "A Dairy Cow", popularity => 10u64));
+    /// #   index_writer.add_document(doc!(boosted=>0u64, product_name => "The Diary of a Young Girl", popularity => 15u64));
+    /// #   index_writer.commit()?;
+    /// // ...
+    /// # let user_query = "diary";
+    /// # let query = QueryParser::for_index(&index, vec![product_name]).parse_query(user_query)?;
+    ///
+    /// // This is where we build our collector with our custom score.
+    /// let top_docs_by_custom_score = TopDocs
+    ///         ::with_limit(10)
+    ///          .custom_score(move |segment_reader: &SegmentReader| {
+    ///             // The argument is a function that returns our scoring
+    ///             // function.
+    ///             //
+    ///             // The point of this "mother" function is to gather all
+    ///             // of the segment level information we need for scoring.
+    ///             // Typically, fast_fields.
+    ///             //
+    ///             // In our case, we will get a reader for the popularity
+    ///             // fast field and a boosted field.
+    ///             //
+    ///             // We want to get boosted items score, and when we get
+    ///             // a tie, return the item with the highest popularity.
+    ///             //
+    ///             // Note that this is implemented by using a `(u64, u64)`
+    ///             // as a score.
+    ///             let popularity_reader =
+    ///                 segment_reader.fast_fields().u64(popularity).unwrap();
+    ///             let boosted_reader =
+    ///                 segment_reader.fast_fields().u64(boosted).unwrap();
+    ///    
+    ///             // We can now define our actual scoring function
+    ///             move |doc: DocId| {
+    ///                 let popularity: u64 = popularity_reader.get(doc);
+    ///                 let boosted: u64 = boosted_reader.get(doc);
+    ///                 // Score do not have to be `f64` in tantivy.
+    ///                 // Here we return a couple to get lexicographical order
+    ///                 // for free.
+    ///                 (boosted, popularity)
+    ///             }
+    ///           });
+    /// # let reader = index.reader()?;
+    /// # let searcher = reader.searcher();
+    /// // ... and here are our documents. Note this is a simple vec.
+    /// // The `Score` in the pair is our tweaked score.
+    /// let resulting_docs: Vec<((u64, u64), DocAddress)> =
+    ///      searcher.search(&*query, &top_docs_by_custom_score)?;
+    ///
+    /// # Ok(())
+    /// # }
+    /// ```
+    ///
+    /// # See also
+    /// [tweak_score(...)](#method.tweak_score).
+    pub fn custom_score<TScore, TCustomSegmentScorer, TCustomScorer>(
+        self,
+        custom_score: TCustomScorer,
+    ) -> impl Collector<Fruit = Vec<(TScore, DocAddress)>>
+    where
+        TScore: 'static + Send + Sync + Clone + PartialOrd,
+        TCustomSegmentScorer: CustomSegmentScorer<TScore> + 'static,
+        TCustomScorer: CustomScorer<TScore, Child = TCustomSegmentScorer> + Send + Sync,
+    {
+        CustomScoreTopCollector::new(custom_score, self.0.into_tscore())
+    }
+}
+
+impl Collector for TopDocs {
+    type Fruit = Vec<(Score, DocAddress)>;
+
+    type Child = TopScoreSegmentCollector;
+
+    fn for_segment(
+        &self,
+        segment_local_id: SegmentLocalId,
+        reader: &SegmentReader,
+    ) -> crate::Result<Self::Child> {
+        let collector = self.0.for_segment(segment_local_id, reader)?;
+        Ok(TopScoreSegmentCollector(collector))
+    }
+
+    fn requires_scoring(&self) -> bool {
+        true
+    }
+
+    fn merge_fruits(
+        &self,
+        child_fruits: Vec<Vec<(Score, DocAddress)>>,
+    ) -> crate::Result<Self::Fruit> {
+        self.0.merge_fruits(child_fruits)
+    }
+
+    fn collect_segment(
+        &self,
+        weight: &dyn Weight,
+        segment_ord: u32,
+        reader: &SegmentReader,
+    ) -> crate::Result<<Self::Child as SegmentCollector>::Fruit> {
+        let heap_len = self.0.limit + self.0.offset;
+        let mut heap: BinaryHeap<ComparableDoc<Score, DocId>> = BinaryHeap::with_capacity(heap_len);
+
+        if let Some(delete_bitset) = reader.delete_bitset() {
+            let mut threshold = Score::MIN;
+            weight.for_each_pruning(threshold, reader, &mut |doc, score| {
+                if delete_bitset.is_deleted(doc) {
+                    return threshold;
+                }
+                let heap_item = ComparableDoc {
+                    feature: score,
+                    doc,
+                };
+                if heap.len() < heap_len {
+                    heap.push(heap_item);
+                    if heap.len() == heap_len {
+                        threshold = heap.peek().map(|el| el.feature).unwrap_or(Score::MIN);
+                    }
+                    return threshold;
+                }
+                *heap.peek_mut().unwrap() = heap_item;
+                threshold = heap.peek().map(|el| el.feature).unwrap_or(Score::MIN);
+                threshold
+            })?;
+        } else {
+            weight.for_each_pruning(Score::MIN, reader, &mut |doc, score| {
+                let heap_item = ComparableDoc {
+                    feature: score,
+                    doc,
+                };
+                if heap.len() < heap_len {
+                    heap.push(heap_item);
+                    // TODO the threshold is suboptimal for heap.len == heap_len
+                    if heap.len() == heap_len {
+                        return heap.peek().map(|el| el.feature).unwrap_or(Score::MIN);
+                    } else {
+                        return Score::MIN;
+                    }
+                }
+                *heap.peek_mut().unwrap() = heap_item;
+                heap.peek().map(|el| el.feature).unwrap_or(Score::MIN)
+            })?;
+        }
+
+        let fruit = heap
+            .into_sorted_vec()
+            .into_iter()
+            .map(|cid| (cid.feature, DocAddress(segment_ord, cid.doc)))
+            .collect();
+        Ok(fruit)
+    }
+}
+
+/// Segment Collector associated to `TopDocs`.
+pub struct TopScoreSegmentCollector(TopSegmentCollector<Score>);
+
+impl SegmentCollector for TopScoreSegmentCollector {
+    type Fruit = Vec<(Score, DocAddress)>;
+
+    fn collect(&mut self, doc: DocId, score: Score) {
+        self.0.collect(doc, score);
+    }
+
+    fn harvest(self) -> Vec<(Score, DocAddress)> {
+        self.0.harvest()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::TopDocs;
+    use crate::collector::Collector;
+    use crate::query::{AllQuery, Query, QueryParser};
+    use crate::schema::{Field, Schema, FAST, STORED, TEXT};
+    use crate::Index;
+    use crate::IndexWriter;
+    use crate::Score;
+    use crate::{DocAddress, DocId, SegmentReader};
+
+    fn make_index() -> Index {
+        let mut schema_builder = Schema::builder();
+        let text_field = schema_builder.add_text_field("text", TEXT);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        {
+            // writing the segment
+            let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+            index_writer.add_document(doc!(text_field=>"Hello happy tax payer."));
+            index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer"));
+            index_writer.add_document(doc!(text_field=>"I like Droopy"));
+            assert!(index_writer.commit().is_ok());
+        }
+        index
+    }
+
+    fn assert_results_equals(results: &[(Score, DocAddress)], expected: &[(Score, DocAddress)]) {
+        for (result, expected) in results.iter().zip(expected.iter()) {
+            assert_eq!(result.1, expected.1);
+            crate::assert_nearly_equals!(result.0, expected.0);
+        }
+    }
+
+    #[test]
+    fn test_top_collector_not_at_capacity() {
+        let index = make_index();
+        let field = index.schema().get_field("text").unwrap();
+        let query_parser = QueryParser::for_index(&index, vec![field]);
+        let text_query = query_parser.parse_query("droopy tax").unwrap();
+        let score_docs: Vec<(Score, DocAddress)> = index
+            .reader()
+            .unwrap()
+            .searcher()
+            .search(&text_query, &TopDocs::with_limit(4))
+            .unwrap();
+        assert_results_equals(
+            &score_docs,
+            &[
+                (0.81221175, DocAddress(0u32, 1)),
+                (0.5376842, DocAddress(0u32, 2)),
+                (0.48527452, DocAddress(0, 0)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_top_collector_not_at_capacity_with_offset() {
+        let index = make_index();
+        let field = index.schema().get_field("text").unwrap();
+        let query_parser = QueryParser::for_index(&index, vec![field]);
+        let text_query = query_parser.parse_query("droopy tax").unwrap();
+        let score_docs: Vec<(Score, DocAddress)> = index
+            .reader()
+            .unwrap()
+            .searcher()
+            .search(&text_query, &TopDocs::with_limit(4).and_offset(2))
+            .unwrap();
+        assert_results_equals(&score_docs[..], &[(0.48527452, DocAddress(0, 0))]);
+    }
+
+    #[test]
+    fn test_top_collector_at_capacity() {
+        let index = make_index();
+        let field = index.schema().get_field("text").unwrap();
+        let query_parser = QueryParser::for_index(&index, vec![field]);
+        let text_query = query_parser.parse_query("droopy tax").unwrap();
+        let score_docs: Vec<(Score, DocAddress)> = index
+            .reader()
+            .unwrap()
+            .searcher()
+            .search(&text_query, &TopDocs::with_limit(2))
+            .unwrap();
+        assert_results_equals(
+            &score_docs,
+            &[
+                (0.81221175, DocAddress(0u32, 1)),
+                (0.5376842, DocAddress(0u32, 2)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_top_collector_at_capacity_with_offset() {
+        let index = make_index();
+        let field = index.schema().get_field("text").unwrap();
+        let query_parser = QueryParser::for_index(&index, vec![field]);
+        let text_query = query_parser.parse_query("droopy tax").unwrap();
+        let score_docs: Vec<(Score, DocAddress)> = index
+            .reader()
+            .unwrap()
+            .searcher()
+            .search(&text_query, &TopDocs::with_limit(2).and_offset(1))
+            .unwrap();
+        assert_results_equals(
+            &score_docs[..],
+            &[
+                (0.5376842, DocAddress(0u32, 2)),
+                (0.48527452, DocAddress(0, 0)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_top_collector_stable_sorting() {
+        let index = make_index();
+
+        // using AllQuery to get a constant score
+        let searcher = index.reader().unwrap().searcher();
+
+        let page_1 = searcher.search(&AllQuery, &TopDocs::with_limit(2)).unwrap();
+
+        let page_2 = searcher.search(&AllQuery, &TopDocs::with_limit(3)).unwrap();
+
+        // precondition for the test to be meaningful: we did get documents
+        // with the same score
+        assert!(page_1.iter().all(|result| result.0 == page_1[0].0));
+        assert!(page_2.iter().all(|result| result.0 == page_2[0].0));
+
+        // sanity check since we're relying on make_index()
+        assert_eq!(page_1.len(), 2);
+        assert_eq!(page_2.len(), 3);
+
+        assert_eq!(page_1, &page_2[..page_1.len()]);
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_top_0() {
+        TopDocs::with_limit(0);
+    }
+
+    const TITLE: &str = "title";
+    const SIZE: &str = "size";
+
+    #[test]
+    fn test_top_field_collector_not_at_capacity() {
+        let mut schema_builder = Schema::builder();
+        let title = schema_builder.add_text_field(TITLE, TEXT);
+        let size = schema_builder.add_u64_field(SIZE, FAST);
+        let schema = schema_builder.build();
+        let (index, query) = index("beer", title, schema, |index_writer| {
+            index_writer.add_document(doc!(
+                title => "bottle of beer",
+                size => 12u64,
+            ));
+            index_writer.add_document(doc!(
+                title => "growler of beer",
+                size => 64u64,
+            ));
+            index_writer.add_document(doc!(
+                title => "pint of beer",
+                size => 16u64,
+            ));
+        });
+        let searcher = index.reader().unwrap().searcher();
+
+        let top_collector = TopDocs::with_limit(4).order_by_u64_field(size);
+        let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector).unwrap();
+        assert_eq!(
+            &top_docs[..],
+            &[
+                (64, DocAddress(0, 1)),
+                (16, DocAddress(0, 2)),
+                (12, DocAddress(0, 0))
+            ]
+        );
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_field_does_not_exist() {
+        let mut schema_builder = Schema::builder();
+        let title = schema_builder.add_text_field(TITLE, TEXT);
+        let size = schema_builder.add_u64_field(SIZE, FAST);
+        let schema = schema_builder.build();
+        let (index, _) = index("beer", title, schema, |index_writer| {
+            index_writer.add_document(doc!(
+                title => "bottle of beer",
+                size => 12u64,
+            ));
+        });
+        let searcher = index.reader().unwrap().searcher();
+        let top_collector = TopDocs::with_limit(4).order_by_u64_field(Field::from_field_id(2));
+        let segment_reader = searcher.segment_reader(0u32);
+        top_collector
+            .for_segment(0, segment_reader)
+            .expect("should panic");
+    }
+
+    #[test]
+    fn test_field_not_fast_field() {
+        let mut schema_builder = Schema::builder();
+        let title = schema_builder.add_text_field(TITLE, TEXT);
+        let size = schema_builder.add_u64_field(SIZE, STORED);
+        let schema = schema_builder.build();
+        let (index, _) = index("beer", title, schema, |index_writer| {
+            index_writer.add_document(doc!(
+                title => "bottle of beer",
+                size => 12u64,
+            ));
+        });
+        let searcher = index.reader().unwrap().searcher();
+        let segment = searcher.segment_reader(0);
+        let top_collector = TopDocs::with_limit(4).order_by_u64_field(size);
+        let err = top_collector.for_segment(0, segment);
+        if let Err(crate::TantivyError::SchemaError(msg)) = err {
+            assert_eq!(
+                msg,
+                "Field requested (Field(1)) is not a i64/u64 fast field."
+            );
+        } else {
+            assert!(false);
+        }
+    }
+
+    #[test]
+    fn test_tweak_score_top_collector_with_offset() {
+        let index = make_index();
+        let field = index.schema().get_field("text").unwrap();
+        let query_parser = QueryParser::for_index(&index, vec![field]);
+        let text_query = query_parser.parse_query("droopy tax").unwrap();
+        let collector = TopDocs::with_limit(2).and_offset(1).tweak_score(
+            move |_segment_reader: &SegmentReader| move |doc: DocId, _original_score: Score| doc,
+        );
+        let score_docs: Vec<(u32, DocAddress)> = index
+            .reader()
+            .unwrap()
+            .searcher()
+            .search(&text_query, &collector)
+            .unwrap();
+
+        assert_eq!(
+            score_docs,
+            vec![(1, DocAddress(0, 1)), (0, DocAddress(0, 0)),]
+        );
+    }
+
+    #[test]
+    fn test_custom_score_top_collector_with_offset() {
+        let index = make_index();
+        let field = index.schema().get_field("text").unwrap();
+        let query_parser = QueryParser::for_index(&index, vec![field]);
+        let text_query = query_parser.parse_query("droopy tax").unwrap();
+        let collector = TopDocs::with_limit(2)
+            .and_offset(1)
+            .custom_score(move |_segment_reader: &SegmentReader| move |doc: DocId| doc);
+        let score_docs: Vec<(u32, DocAddress)> = index
+            .reader()
+            .unwrap()
+            .searcher()
+            .search(&text_query, &collector)
+            .unwrap();
+
+        assert_eq!(
+            score_docs,
+            vec![(1, DocAddress(0, 1)), (0, DocAddress(0, 0)),]
+        );
+    }
+
+    fn index(
+        query: &str,
+        query_field: Field,
+        schema: Schema,
+        mut doc_adder: impl FnMut(&mut IndexWriter) -> (),
+    ) -> (Index, Box<dyn Query>) {
+        let index = Index::create_in_ram(schema);
+
+        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        doc_adder(&mut index_writer);
+        index_writer.commit().unwrap();
+        let query_parser = QueryParser::for_index(&index, vec![query_field]);
+        let query = query_parser.parse_query(query).unwrap();
+        (index, query)
+    }
+}
--- a/src/collector/tweak_score_top_collector.rs
+++ b/src/collector/tweak_score_top_collector.rs
@@ -0,0 +1,129 @@
+use crate::collector::top_collector::{TopCollector, TopSegmentCollector};
+use crate::collector::{Collector, SegmentCollector};
+use crate::DocAddress;
+use crate::{DocId, Result, Score, SegmentReader};
+
+pub(crate) struct TweakedScoreTopCollector<TScoreTweaker, TScore = Score> {
+    score_tweaker: TScoreTweaker,
+    collector: TopCollector<TScore>,
+}
+
+impl<TScoreTweaker, TScore> TweakedScoreTopCollector<TScoreTweaker, TScore>
+where
+    TScore: Clone + PartialOrd,
+{
+    pub fn new(
+        score_tweaker: TScoreTweaker,
+        collector: TopCollector<TScore>,
+    ) -> TweakedScoreTopCollector<TScoreTweaker, TScore> {
+        TweakedScoreTopCollector {
+            score_tweaker,
+            collector,
+        }
+    }
+}
+
+/// A `ScoreSegmentTweaker` makes it possible to modify the default score
+/// for a given document belonging to a specific segment.
+///
+/// It is the segment local version of the [`ScoreTweaker`](./trait.ScoreTweaker.html).
+pub trait ScoreSegmentTweaker<TScore>: 'static {
+    /// Tweak the given `score` for the document `doc`.
+    fn score(&mut self, doc: DocId, score: Score) -> TScore;
+}
+
+/// `ScoreTweaker` makes it possible to tweak the score
+/// emitted  by the scorer into another one.
+///
+/// The `ScoreTweaker` itself does not make much of the computation itself.
+/// Instead, it helps constructing `Self::Child` instances that will compute
+/// the score at a segment scale.
+pub trait ScoreTweaker<TScore>: Sync {
+    /// Type of the associated [`ScoreSegmentTweaker`](./trait.ScoreSegmentTweaker.html).
+    type Child: ScoreSegmentTweaker<TScore>;
+
+    /// Builds a child tweaker for a specific segment. The child scorer is associated to
+    /// a specific segment.
+    fn segment_tweaker(&self, segment_reader: &SegmentReader) -> Result<Self::Child>;
+}
+
+impl<TScoreTweaker, TScore> Collector for TweakedScoreTopCollector<TScoreTweaker, TScore>
+where
+    TScoreTweaker: ScoreTweaker<TScore> + Send + Sync,
+    TScore: 'static + PartialOrd + Clone + Send + Sync,
+{
+    type Fruit = Vec<(TScore, DocAddress)>;
+
+    type Child = TopTweakedScoreSegmentCollector<TScoreTweaker::Child, TScore>;
+
+    fn for_segment(
+        &self,
+        segment_local_id: u32,
+        segment_reader: &SegmentReader,
+    ) -> Result<Self::Child> {
+        let segment_scorer = self.score_tweaker.segment_tweaker(segment_reader)?;
+        let segment_collector = self
+            .collector
+            .for_segment(segment_local_id, segment_reader)?;
+        Ok(TopTweakedScoreSegmentCollector {
+            segment_collector,
+            segment_scorer,
+        })
+    }
+
+    fn requires_scoring(&self) -> bool {
+        true
+    }
+
+    fn merge_fruits(&self, segment_fruits: Vec<Self::Fruit>) -> Result<Self::Fruit> {
+        self.collector.merge_fruits(segment_fruits)
+    }
+}
+
+pub struct TopTweakedScoreSegmentCollector<TSegmentScoreTweaker, TScore>
+where
+    TScore: 'static + PartialOrd + Clone + Send + Sync + Sized,
+    TSegmentScoreTweaker: ScoreSegmentTweaker<TScore>,
+{
+    segment_collector: TopSegmentCollector<TScore>,
+    segment_scorer: TSegmentScoreTweaker,
+}
+
+impl<TSegmentScoreTweaker, TScore> SegmentCollector
+    for TopTweakedScoreSegmentCollector<TSegmentScoreTweaker, TScore>
+where
+    TScore: 'static + PartialOrd + Clone + Send + Sync,
+    TSegmentScoreTweaker: 'static + ScoreSegmentTweaker<TScore>,
+{
+    type Fruit = Vec<(TScore, DocAddress)>;
+
+    fn collect(&mut self, doc: DocId, score: Score) {
+        let score = self.segment_scorer.score(doc, score);
+        self.segment_collector.collect(doc, score);
+    }
+
+    fn harvest(self) -> Vec<(TScore, DocAddress)> {
+        self.segment_collector.harvest()
+    }
+}
+
+impl<F, TScore, TSegmentScoreTweaker> ScoreTweaker<TScore> for F
+where
+    F: 'static + Send + Sync + Fn(&SegmentReader) -> TSegmentScoreTweaker,
+    TSegmentScoreTweaker: ScoreSegmentTweaker<TScore>,
+{
+    type Child = TSegmentScoreTweaker;
+
+    fn segment_tweaker(&self, segment_reader: &SegmentReader) -> Result<Self::Child> {
+        Ok((self)(segment_reader))
+    }
+}
+
+impl<F, TScore> ScoreSegmentTweaker<TScore> for F
+where
+    F: 'static + FnMut(DocId, Score) -> TScore,
+{
+    fn score(&mut self, doc: DocId, score: Score) -> TScore {
+        (self)(doc, score)
+    }
+}
--- a/src/common/bitpacker.rs
+++ b/src/common/bitpacker.rs
@@ -1,9 +1,6 @@
-use common::serialize::BinarySerializable;
+use byteorder::{ByteOrder, LittleEndian, WriteBytesExt};
 use std::io;
-use std::io::Write;
-use std::mem;
 use std::ops::Deref;
-use std::ptr;

 pub(crate) struct BitPacker {
    mini_buffer: u64,
@@ -18,7 +15,7 @@ impl BitPacker {
        }
    }

-    pub fn write<TWrite: Write>(
+    pub fn write<TWrite: io::Write>(
        &mut self,
        val: u64,
        num_bits: u8,
@@ -28,14 +25,14 @@ impl BitPacker {
        let num_bits = num_bits as usize;
        if self.mini_buffer_written + num_bits > 64 {
            self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32);
-            self.mini_buffer.serialize(output)?;
+            output.write_u64::<LittleEndian>(self.mini_buffer)?;
            self.mini_buffer = val_u64.wrapping_shr((64 - self.mini_buffer_written) as u32);
            self.mini_buffer_written = self.mini_buffer_written + num_bits - 64;
        } else {
            self.mini_buffer |= val_u64 << self.mini_buffer_written;
            self.mini_buffer_written += num_bits;
            if self.mini_buffer_written == 64 {
-                self.mini_buffer.serialize(output)?;
+                output.write_u64::<LittleEndian>(self.mini_buffer)?;
                self.mini_buffer_written = 0;
                self.mini_buffer = 0u64;
            }
@@ -43,17 +40,18 @@ impl BitPacker {
        Ok(())
    }

-    pub fn flush<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
+    pub fn flush<TWrite: io::Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
        if self.mini_buffer_written > 0 {
            let num_bytes = (self.mini_buffer_written + 7) / 8;
-            let arr: [u8; 8] = unsafe { mem::transmute::<u64, [u8; 8]>(self.mini_buffer.to_le()) };
+            let mut arr: [u8; 8] = [0u8; 8];
+            LittleEndian::write_u64(&mut arr, self.mini_buffer);
            output.write_all(&arr[..num_bytes])?;
            self.mini_buffer_written = 0;
        }
        Ok(())
    }

-    pub fn close<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
+    pub fn close<TWrite: io::Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
        self.flush(output)?;
        // Padding the write file to simplify reads.
        output.write_all(&[0u8; 7])?;
@@ -66,7 +64,7 @@ pub struct BitUnpacker<Data>
 where
    Data: Deref<Target = [u8]>,
 {
-    num_bits: usize,
+    num_bits: u64,
    mask: u64,
    data: Data,
 }
@@ -82,13 +80,13 @@ where
            (1u64 << num_bits) - 1u64
        };
        BitUnpacker {
-            num_bits: num_bits as usize,
+            num_bits: u64::from(num_bits),
            mask,
            data,
        }
    }

-    pub fn get(&self, idx: usize) -> u64 {
+    pub fn get(&self, idx: u64) -> u64 {
        if self.num_bits == 0 {
            return 0u64;
        }
@@ -99,40 +97,13 @@ where
        let addr = addr_in_bits >> 3;
        let bit_shift = addr_in_bits & 7;
        debug_assert!(
-            addr + 8 <= data.len(),
+            addr + 8 <= data.len() as u64,
            "The fast field field should have been padded with 7 bytes."
        );
-        let val_unshifted_unmasked: u64 =
-            u64::from_le(unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) });
+        let val_unshifted_unmasked: u64 = LittleEndian::read_u64(&data[(addr as usize)..]);
        let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
        val_shifted & mask
    }
-
-    /// Reads a range of values from the fast field.
-    ///
-    /// The range of values read is from
-    /// `[start..start + output.len()[`
-    pub fn get_range(&self, start: u32, output: &mut [u64]) {
-        if self.num_bits == 0 {
-            for val in output.iter_mut() {
-                *val = 0u64;
-            }
-        } else {
-            let data: &[u8] = &*self.data;
-            let num_bits = self.num_bits;
-            let mask = self.mask;
-            let mut addr_in_bits = (start as usize) * num_bits;
-            for output_val in output.iter_mut() {
-                let addr = addr_in_bits >> 3;
-                let bit_shift = addr_in_bits & 7;
-                let val_unshifted_unmasked: u64 =
-                    unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
-                let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
-                *output_val = val_shifted & mask;
-                addr_in_bits += num_bits;
-            }
-        }
-    }
 }

 #[cfg(test)]
@@ -158,7 +129,7 @@ mod test {
    fn test_bitpacker_util(len: usize, num_bits: u8) {
        let (bitunpacker, vals) = create_fastfield_bitpacker(len, num_bits);
        for (i, val) in vals.iter().enumerate() {
-            assert_eq!(bitunpacker.get(i), *val);
+            assert_eq!(bitunpacker.get(i as u64), *val);
        }
    }

@@ -170,17 +141,4 @@ mod test {
        test_bitpacker_util(6, 14);
        test_bitpacker_util(1000, 14);
    }
-
-    #[test]
-    fn test_bitpacker_range() {
-        let (bitunpacker, vals) = create_fastfield_bitpacker(100_000, 12);
-        let buffer_len = 100;
-        let mut buffer = vec![0u64; buffer_len];
-        for start in vec![0, 10, 20, 100, 1_000] {
-            bitunpacker.get_range(start as u32, &mut buffer[..]);
-            for i in 0..buffer_len {
-                assert_eq!(buffer[i], vals[start + i]);
-            }
-        }
-    }
 }
--- a/src/common/bitset.rs
+++ b/src/common/bitset.rs
@@ -5,7 +5,7 @@ use std::u64;
 pub(crate) struct TinySet(u64);

 impl fmt::Debug for TinySet {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.into_iter().collect::<Vec<u32>>().fmt(f)
    }
 }
@@ -33,18 +33,27 @@ impl TinySet {
        TinySet(0u64)
    }

+    pub fn clear(&mut self) {
+        self.0 = 0u64;
+    }
+
    /// Returns the complement of the set in `[0, 64[`.
-    fn complement(&self) -> TinySet {
+    fn complement(self) -> TinySet {
        TinySet(!self.0)
    }

    /// Returns true iff the `TinySet` contains the element `el`.
-    pub fn contains(&self, el: u32) -> bool {
+    pub fn contains(self, el: u32) -> bool {
        !self.intersect(TinySet::singleton(el)).is_empty()
    }

+    /// Returns the number of elements in the TinySet.
+    pub fn len(self) -> u32 {
+        self.0.count_ones()
+    }
+
    /// Returns the intersection of `self` and `other`
-    pub fn intersect(&self, other: TinySet) -> TinySet {
+    pub fn intersect(self, other: TinySet) -> TinySet {
        TinySet(self.0 & other.0)
    }

@@ -77,7 +86,7 @@ impl TinySet {

    /// Returns true iff the `TinySet` is empty.
    #[inline(always)]
-    pub fn is_empty(&self) -> bool {
+    pub fn is_empty(self) -> bool {
        self.0 == 0u64
    }

@@ -109,22 +118,12 @@ impl TinySet {
    pub fn range_greater_or_equal(from_included: u32) -> TinySet {
        TinySet::range_lower(from_included).complement()
    }
-
-    pub fn clear(&mut self) {
-        self.0 = 0u64;
-    }
-
-    pub fn len(&self) -> u32 {
-        self.0.count_ones()
-    }
 }

 #[derive(Clone)]
 pub struct BitSet {
    tinysets: Box<[TinySet]>,
-    len: usize, //< Technically it should be u32, but we
-    // count multiple inserts.
-    // `usize` guards us from overflow.
+    len: usize,
    max_value: u32,
 }

@@ -204,12 +203,12 @@ mod tests {

    use super::BitSet;
    use super::TinySet;
-    use docset::DocSet;
-    use query::BitSetDocSet;
+    use crate::docset::{DocSet, TERMINATED};
+    use crate::query::BitSetDocSet;
+    use crate::tests;
+    use crate::tests::generate_nonunique_unsorted;
    use std::collections::BTreeSet;
    use std::collections::HashSet;
-    use tests;
-    use tests::generate_nonunique_unsorted;

    #[test]
    fn test_tiny_set() {
@@ -266,23 +265,25 @@ mod tests {

    #[test]
    fn test_bitset_large() {
-        let arr = generate_nonunique_unsorted(1_000_000, 50_000);
+        let arr = generate_nonunique_unsorted(100_000, 5_000);
        let mut btreeset: BTreeSet<u32> = BTreeSet::new();
-        let mut bitset = BitSet::with_max_value(1_000_000);
+        let mut bitset = BitSet::with_max_value(100_000);
        for el in arr {
            btreeset.insert(el);
            bitset.insert(el);
        }
-        for i in 0..1_000_000 {
+        for i in 0..100_000 {
            assert_eq!(btreeset.contains(&i), bitset.contains(i));
        }
        assert_eq!(btreeset.len(), bitset.len());
        let mut bitset_docset = BitSetDocSet::from(bitset);
+        let mut remaining = true;
        for el in btreeset.into_iter() {
-            bitset_docset.advance();
+            assert!(remaining);
            assert_eq!(bitset_docset.doc(), el);
+            remaining = bitset_docset.advance() != TERMINATED;
        }
-        assert!(!bitset_docset.advance());
+        assert!(!remaining);
    }

    #[test]
@@ -342,7 +343,7 @@ mod tests {
    #[test]
    fn test_bitset_clear() {
        let mut bitset = BitSet::with_max_value(1_000);
-        let els = tests::sample(1_000, 0.01f32);
+        let els = tests::sample(1_000, 0.01f64);
        for &el in &els {
            bitset.insert(el);
        }
--- a/src/common/composite_file.rs
+++ b/src/common/composite_file.rs
@@ -1,9 +1,11 @@
-use common::BinarySerializable;
-use common::CountingWriter;
-use common::VInt;
-use directory::ReadOnlySource;
-use directory::WritePtr;
-use schema::Field;
+use crate::common::BinarySerializable;
+use crate::common::CountingWriter;
+use crate::common::VInt;
+use crate::directory::ReadOnlySource;
+use crate::directory::{TerminatingWrite, WritePtr};
+use crate::schema::Field;
+use crate::space_usage::FieldUsage;
+use crate::space_usage::PerFieldSpaceUsage;
 use std::collections::HashMap;
 use std::io::Write;
 use std::io::{self, Read};
@@ -37,10 +39,10 @@ impl BinarySerializable for FileAddr {
 /// A `CompositeWrite` is used to write a `CompositeFile`.
 pub struct CompositeWrite<W = WritePtr> {
    write: CountingWriter<W>,
-    offsets: HashMap<FileAddr, usize>,
+    offsets: HashMap<FileAddr, u64>,
 }

-impl<W: Write> CompositeWrite<W> {
+impl<W: TerminatingWrite + Write> CompositeWrite<W> {
    /// Crate a new API writer that writes a composite file
    /// in a given write.
    pub fn wrap(w: W) -> CompositeWrite<W> {
@@ -64,7 +66,7 @@ impl<W: Write> CompositeWrite<W> {
        &mut self.write
    }

-    /// Close the composite file.
+    /// Close the composite file
    ///
    /// An index of the different field offsets
    /// will be written as a footer.
@@ -72,7 +74,8 @@ impl<W: Write> CompositeWrite<W> {
        let footer_offset = self.write.written_bytes();
        VInt(self.offsets.len() as u64).serialize(&mut self.write)?;

-        let mut offset_fields: Vec<_> = self.offsets
+        let mut offset_fields: Vec<_> = self
+            .offsets
            .iter()
            .map(|(file_addr, offset)| (*offset, *file_addr))
            .collect();
@@ -88,8 +91,7 @@ impl<W: Write> CompositeWrite<W> {

        let footer_len = (self.write.written_bytes() - footer_offset) as u32;
        footer_len.serialize(&mut self.write)?;
-        self.write.flush()?;
-        Ok(())
+        self.write.terminate()
    }
 }

@@ -112,7 +114,6 @@ impl CompositeFile {
        let end = data.len();
        let footer_len_data = data.slice_from(end - 4);
        let footer_len = u32::deserialize(&mut footer_len_data.as_slice())? as usize;
-
        let footer_start = end - 4 - footer_len;
        let footer_data = data.slice(footer_start, footer_start + footer_len);
        let mut footer_buffer = footer_data.as_slice();
@@ -166,16 +167,27 @@ impl CompositeFile {
            .get(&FileAddr { field, idx })
            .map(|&(from, to)| self.data.slice(from, to))
    }
+
+    pub fn space_usage(&self) -> PerFieldSpaceUsage {
+        let mut fields = HashMap::new();
+        for (&field_addr, &(start, end)) in self.offsets_index.iter() {
+            fields
+                .entry(field_addr.field)
+                .or_insert_with(|| FieldUsage::empty(field_addr.field))
+                .add_field_idx(field_addr.idx, end - start);
+        }
+        PerFieldSpaceUsage::new(fields)
+    }
 }

 #[cfg(test)]
 mod test {

    use super::{CompositeFile, CompositeWrite};
-    use common::BinarySerializable;
-    use common::VInt;
-    use directory::{Directory, RAMDirectory};
-    use schema::Field;
+    use crate::common::BinarySerializable;
+    use crate::common::VInt;
+    use crate::directory::{Directory, RAMDirectory};
+    use crate::schema::Field;
    use std::io::Write;
    use std::path::Path;

@@ -187,13 +199,13 @@ mod test {
            let w = directory.open_write(path).unwrap();
            let mut composite_write = CompositeWrite::wrap(w);
            {
-                let mut write_0 = composite_write.for_field(Field(0u32));
+                let mut write_0 = composite_write.for_field(Field::from_field_id(0u32));
                VInt(32431123u64).serialize(&mut write_0).unwrap();
                write_0.flush().unwrap();
            }

            {
-                let mut write_4 = composite_write.for_field(Field(4u32));
+                let mut write_4 = composite_write.for_field(Field::from_field_id(4u32));
                VInt(2).serialize(&mut write_4).unwrap();
                write_4.flush().unwrap();
            }
@@ -203,14 +215,18 @@ mod test {
            let r = directory.open_read(path).unwrap();
            let composite_file = CompositeFile::open(&r).unwrap();
            {
-                let file0 = composite_file.open_read(Field(0u32)).unwrap();
+                let file0 = composite_file
+                    .open_read(Field::from_field_id(0u32))
+                    .unwrap();
                let mut file0_buf = file0.as_slice();
                let payload_0 = VInt::deserialize(&mut file0_buf).unwrap().0;
                assert_eq!(file0_buf.len(), 0);
                assert_eq!(payload_0, 32431123u64);
            }
            {
-                let file4 = composite_file.open_read(Field(4u32)).unwrap();
+                let file4 = composite_file
+                    .open_read(Field::from_field_id(4u32))
+                    .unwrap();
                let mut file4_buf = file4.as_slice();
                let payload_4 = VInt::deserialize(&mut file4_buf).unwrap().0;
                assert_eq!(file4_buf.len(), 0);
@@ -218,5 +234,4 @@ mod test {
            }
        }
    }
-
 }
--- a/src/common/counting_writer.rs
+++ b/src/common/counting_writer.rs
@@ -1,9 +1,11 @@
+use crate::directory::AntiCallToken;
+use crate::directory::TerminatingWrite;
 use std::io;
 use std::io::Write;

 pub struct CountingWriter<W> {
    underlying: W,
-    written_bytes: usize,
+    written_bytes: u64,
 }

 impl<W: Write> CountingWriter<W> {
@@ -14,11 +16,11 @@ impl<W: Write> CountingWriter<W> {
        }
    }

-    pub fn written_bytes(&self) -> usize {
+    pub fn written_bytes(&self) -> u64 {
        self.written_bytes
    }

-    pub fn finish(mut self) -> io::Result<(W, usize)> {
+    pub fn finish(mut self) -> io::Result<(W, u64)> {
        self.flush()?;
        Ok((self.underlying, self.written_bytes))
    }
@@ -27,15 +29,28 @@ impl<W: Write> CountingWriter<W> {
 impl<W: Write> Write for CountingWriter<W> {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        let written_size = self.underlying.write(buf)?;
-        self.written_bytes += written_size;
+        self.written_bytes += written_size as u64;
        Ok(written_size)
    }

+    fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
+        self.underlying.write_all(buf)?;
+        self.written_bytes += buf.len() as u64;
+        Ok(())
+    }
+
    fn flush(&mut self) -> io::Result<()> {
        self.underlying.flush()
    }
 }

+impl<W: TerminatingWrite> TerminatingWrite for CountingWriter<W> {
+    fn terminate_ref(&mut self, token: AntiCallToken) -> io::Result<()> {
+        self.flush()?;
+        self.underlying.terminate_ref(token)
+    }
+}
+
 #[cfg(test)]
 mod test {

@@ -48,8 +63,8 @@ mod test {
        let mut counting_writer = CountingWriter::wrap(buffer);
        let bytes = (0u8..10u8).collect::<Vec<u8>>();
        counting_writer.write_all(&bytes).unwrap();
-        let (w, len): (Vec<u8>, usize) = counting_writer.finish().unwrap();
-        assert_eq!(len, 10);
+        let (w, len): (Vec<u8>, u64) = counting_writer.finish().unwrap();
+        assert_eq!(len, 10u64);
        assert_eq!(w.len(), 10);
    }
 }
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -10,10 +10,28 @@ pub(crate) use self::bitset::TinySet;
 pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
 pub use self::counting_writer::CountingWriter;
 pub use self::serialize::{BinarySerializable, FixedSize};
-pub use self::vint::VInt;
+pub use self::vint::{
+    read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt,
+};
 pub use byteorder::LittleEndian as Endianness;

-use std::io;
+/// Segment's max doc must be `< MAX_DOC_LIMIT`.
+///
+/// We do not allow segments with more than
+pub const MAX_DOC_LIMIT: u32 = 1 << 31;
+
+pub fn minmax<I, T>(mut vals: I) -> Option<(T, T)>
+where
+    I: Iterator<Item = T>,
+    T: Copy + Ord,
+{
+    if let Some(first_el) = vals.next() {
+        return Some(vals.fold((first_el, first_el), |(min_val, max_val), el| {
+            (min_val.min(el), max_val.max(el))
+        }));
+    }
+    None
+}

 /// Computes the number of bits that will be used for bitpacking.
 ///
@@ -52,11 +70,6 @@ pub(crate) fn is_power_of_2(n: usize) -> bool {
    (n > 0) && (n & (n - 1) == 0)
 }

-/// Create a default io error given a string.
-pub(crate) fn make_io_err(msg: String) -> io::Error {
-    io::Error::new(io::ErrorKind::Other, msg)
-}
-
 /// Has length trait
 pub trait HasLen {
    /// Return length
@@ -101,16 +114,54 @@ pub fn u64_to_i64(val: u64) -> i64 {
    (val ^ HIGHEST_BIT) as i64
 }

+/// Maps a `f64` to `u64`
+///
+/// For simplicity, tantivy internally handles `f64` as `u64`.
+/// The mapping is defined by this function.
+///
+/// Maps `f64` to `u64` so that lexical order is preserved.
+///
+/// This is more suited than simply casting (`val as u64`)
+/// which would truncate the result
+///
+/// # See also
+/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html).
+#[inline(always)]
+pub fn f64_to_u64(val: f64) -> u64 {
+    let bits = val.to_bits();
+    if val.is_sign_positive() {
+        bits ^ HIGHEST_BIT
+    } else {
+        !bits
+    }
+}
+
+/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
+#[inline(always)]
+pub fn u64_to_f64(val: u64) -> f64 {
+    f64::from_bits(if val & HIGHEST_BIT != 0 {
+        val ^ HIGHEST_BIT
+    } else {
+        !val
+    })
+}
+
 #[cfg(test)]
 pub(crate) mod test {

+    pub use super::minmax;
    pub use super::serialize::test::fixed_size_test;
-    use super::{compute_num_bits, i64_to_u64, u64_to_i64};
+    use super::{compute_num_bits, f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
+    use std::f64;

    fn test_i64_converter_helper(val: i64) {
        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
    }

+    fn test_f64_converter_helper(val: f64) {
+        assert_eq!(u64_to_f64(f64_to_u64(val)), val);
+    }
+
    #[test]
    fn test_i64_converter() {
        assert_eq!(i64_to_u64(i64::min_value()), u64::min_value());
@@ -123,6 +174,29 @@ pub(crate) mod test {
        }
    }

+    #[test]
+    fn test_f64_converter() {
+        test_f64_converter_helper(f64::INFINITY);
+        test_f64_converter_helper(f64::NEG_INFINITY);
+        test_f64_converter_helper(0.0);
+        test_f64_converter_helper(-0.0);
+        test_f64_converter_helper(1.0);
+        test_f64_converter_helper(-1.0);
+    }
+
+    #[test]
+    fn test_f64_order() {
+        assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
+            .contains(&f64_to_u64(f64::NAN))); //nan is not a number
+        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
+        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
+        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
+        assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
+        assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
+        assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
+        assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5));
+    }
+
    #[test]
    fn test_compute_num_bits() {
        assert_eq!(compute_num_bits(1), 1u8);
@@ -134,4 +208,28 @@ pub(crate) mod test {
        assert_eq!(compute_num_bits(256), 9u8);
        assert_eq!(compute_num_bits(5_000_000_000), 33u8);
    }
+
+    #[test]
+    fn test_max_doc() {
+        // this is the first time I write a unit test for a constant.
+        assert!(((super::MAX_DOC_LIMIT - 1) as i32) >= 0);
+        assert!((super::MAX_DOC_LIMIT as i32) < 0);
+    }
+
+    #[test]
+    fn test_minmax_empty() {
+        let vals: Vec<u32> = vec![];
+        assert_eq!(minmax(vals.into_iter()), None);
+    }
+
+    #[test]
+    fn test_minmax_one() {
+        assert_eq!(minmax(vec![1].into_iter()), Some((1, 1)));
+    }
+
+    #[test]
+    fn test_minmax_two() {
+        assert_eq!(minmax(vec![1, 2].into_iter()), Some((1, 2)));
+        assert_eq!(minmax(vec![2, 1].into_iter()), Some((1, 2)));
+    }
 }
--- a/src/common/serialize.rs
+++ b/src/common/serialize.rs
@@ -1,6 +1,6 @@
+use crate::common::Endianness;
+use crate::common::VInt;
 use byteorder::{ReadBytesExt, WriteBytesExt};
-use common::Endianness;
-use common::VInt;
 use std::fmt;
 use std::io;
 use std::io::Read;
@@ -89,6 +89,19 @@ impl FixedSize for u64 {
    const SIZE_IN_BYTES: usize = 8;
 }

+impl BinarySerializable for f32 {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+        writer.write_f32::<Endianness>(*self)
+    }
+    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+        reader.read_f32::<Endianness>()
+    }
+}
+
+impl FixedSize for f32 {
+    const SIZE_IN_BYTES: usize = 4;
+}
+
 impl BinarySerializable for i64 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_i64::<Endianness>(*self)
@@ -102,6 +115,19 @@ impl FixedSize for i64 {
    const SIZE_IN_BYTES: usize = 8;
 }

+impl BinarySerializable for f64 {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+        writer.write_f64::<Endianness>(*self)
+    }
+    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+        reader.read_f64::<Endianness>()
+    }
+}
+
+impl FixedSize for f64 {
+    const SIZE_IN_BYTES: usize = 8;
+}
+
 impl BinarySerializable for u8 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u8(*self)
@@ -136,7 +162,7 @@ impl BinarySerializable for String {
 pub mod test {

    use super::*;
-    use common::VInt;
+    use crate::common::VInt;

    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
        let mut buffer = Vec::new();
@@ -172,6 +198,11 @@ pub mod test {
        fixed_size_test::<i64>();
    }

+    #[test]
+    fn test_serialize_f64() {
+        fixed_size_test::<f64>();
+    }
+
    #[test]
    fn test_serialize_u64() {
        fixed_size_test::<u64>();
@@ -181,10 +212,7 @@ pub mod test {
    fn test_serialize_string() {
        assert_eq!(serialize_test(String::from("")), 1);
        assert_eq!(serialize_test(String::from("ぽよぽよ")), 1 + 3 * 4);
-        assert_eq!(
-            serialize_test(String::from("富士さん見える。")),
-            1 + 3 * 8
-        );
+        assert_eq!(serialize_test(String::from("富士さん見える。")), 1 + 3 * 8);
    }

    #[test]
--- a/src/common/vint.rs
+++ b/src/common/vint.rs
@@ -1,12 +1,114 @@
 use super::BinarySerializable;
+use byteorder::{ByteOrder, LittleEndian};
 use std::io;
 use std::io::Read;
 use std::io::Write;

 ///   Wrapper over a `u64` that serializes as a variable int.
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub struct VInt(pub u64);

+const STOP_BIT: u8 = 128;
+
+pub fn serialize_vint_u32(val: u32, buf: &mut [u8; 8]) -> &[u8] {
+    const START_2: u64 = 1 << 7;
+    const START_3: u64 = 1 << 14;
+    const START_4: u64 = 1 << 21;
+    const START_5: u64 = 1 << 28;
+
+    const STOP_1: u64 = START_2 - 1;
+    const STOP_2: u64 = START_3 - 1;
+    const STOP_3: u64 = START_4 - 1;
+    const STOP_4: u64 = START_5 - 1;
+
+    const MASK_1: u64 = 127;
+    const MASK_2: u64 = MASK_1 << 7;
+    const MASK_3: u64 = MASK_2 << 7;
+    const MASK_4: u64 = MASK_3 << 7;
+    const MASK_5: u64 = MASK_4 << 7;
+
+    let val = u64::from(val);
+    const STOP_BIT: u64 = 128u64;
+    let (res, num_bytes) = match val {
+        0..=STOP_1 => (val | STOP_BIT, 1),
+        START_2..=STOP_2 => (
+            (val & MASK_1) | ((val & MASK_2) << 1) | (STOP_BIT << (8)),
+            2,
+        ),
+        START_3..=STOP_3 => (
+            (val & MASK_1) | ((val & MASK_2) << 1) | ((val & MASK_3) << 2) | (STOP_BIT << (8 * 2)),
+            3,
+        ),
+        START_4..=STOP_4 => (
+            (val & MASK_1)
+                | ((val & MASK_2) << 1)
+                | ((val & MASK_3) << 2)
+                | ((val & MASK_4) << 3)
+                | (STOP_BIT << (8 * 3)),
+            4,
+        ),
+        _ => (
+            (val & MASK_1)
+                | ((val & MASK_2) << 1)
+                | ((val & MASK_3) << 2)
+                | ((val & MASK_4) << 3)
+                | ((val & MASK_5) << 4)
+                | (STOP_BIT << (8 * 4)),
+            5,
+        ),
+    };
+    LittleEndian::write_u64(&mut buf[..], res);
+    &buf[0..num_bytes]
+}
+
+/// Returns the number of bytes covered by a
+/// serialized vint `u32`.
+///
+/// Expects a buffer data that starts
+/// by the serialized `vint`, scans at most 5 bytes ahead until
+/// it finds the vint final byte.
+///
+/// # May Panic
+/// If the payload does not start by a valid `vint`
+fn vint_len(data: &[u8]) -> usize {
+    for (i, &val) in data.iter().enumerate().take(5) {
+        if val >= STOP_BIT {
+            return i + 1;
+        }
+    }
+    panic!("Corrupted data. Invalid VInt 32");
+}
+
+/// Reads a vint `u32` from a buffer, and
+/// consumes its payload data.
+///
+/// # Panics
+///
+/// If the buffer does not start by a valid
+/// vint payload
+pub fn read_u32_vint(data: &mut &[u8]) -> u32 {
+    let (result, vlen) = read_u32_vint_no_advance(*data);
+    *data = &data[vlen..];
+    result
+}
+
+pub fn read_u32_vint_no_advance(data: &[u8]) -> (u32, usize) {
+    let vlen = vint_len(data);
+    let mut result = 0u32;
+    let mut shift = 0u64;
+    for &b in &data[..vlen] {
+        result |= u32::from(b & 127u8) << shift;
+        shift += 7;
+    }
+    (result, vlen)
+}
+/// Write a `u32` as a vint payload.
+pub fn write_u32_vint<W: io::Write>(val: u32, writer: &mut W) -> io::Result<()> {
+    let mut buf = [0u8; 8];
+    let data = serialize_vint_u32(val, &mut buf);
+    writer.write_all(&data)
+}
+
 impl VInt {
    pub fn val(&self) -> u64 {
        self.0
@@ -15,24 +117,34 @@ impl VInt {
    pub fn deserialize_u64<R: Read>(reader: &mut R) -> io::Result<u64> {
        VInt::deserialize(reader).map(|vint| vint.0)
    }
+
+    pub fn serialize_into_vec(&self, output: &mut Vec<u8>) {
+        let mut buffer = [0u8; 10];
+        let num_bytes = self.serialize_into(&mut buffer);
+        output.extend(&buffer[0..num_bytes]);
+    }
+
+    pub fn serialize_into(&self, buffer: &mut [u8; 10]) -> usize {
+        let mut remaining = self.0;
+        for (i, b) in buffer.iter_mut().enumerate() {
+            let next_byte: u8 = (remaining % 128u64) as u8;
+            remaining /= 128u64;
+            if remaining == 0u64 {
+                *b = next_byte | STOP_BIT;
+                return i + 1;
+            } else {
+                *b = next_byte;
+            }
+        }
+        unreachable!();
+    }
 }

 impl BinarySerializable for VInt {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
-        let mut remaining = self.0;
        let mut buffer = [0u8; 10];
-        let mut i = 0;
-        loop {
-            let next_byte: u8 = (remaining % 128u64) as u8;
-            remaining /= 128u64;
-            if remaining == 0u64 {
-                buffer[i] = next_byte | 128u8;
-                return writer.write_all(&buffer[0..i + 1]);
-            } else {
-                buffer[i] = next_byte;
-            }
-            i += 1;
-        }
+        let num_bytes = self.serialize_into(&mut buffer);
+        writer.write_all(&buffer[0..num_bytes])
    }

    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
@@ -42,20 +154,81 @@ impl BinarySerializable for VInt {
        loop {
            match bytes.next() {
                Some(Ok(b)) => {
-                    result += u64::from(b % 128u8) << shift;
-                    if b & 128u8 != 0u8 {
-                        break;
+                    result |= u64::from(b % 128u8) << shift;
+                    if b >= STOP_BIT {
+                        return Ok(VInt(result));
                    }
                    shift += 7;
                }
                _ => {
                    return Err(io::Error::new(
                        io::ErrorKind::InvalidData,
-                        "Reach end of buffer",
-                    ))
+                        "Reach end of buffer while reading VInt",
+                    ));
                }
            }
        }
-        Ok(VInt(result))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::serialize_vint_u32;
+    use super::VInt;
+    use crate::common::BinarySerializable;
+
+    fn aux_test_vint(val: u64) {
+        let mut v = [14u8; 10];
+        let num_bytes = VInt(val).serialize_into(&mut v);
+        for i in num_bytes..10 {
+            assert_eq!(v[i], 14u8);
+        }
+        assert!(num_bytes > 0);
+        if num_bytes < 10 {
+            assert!(1u64 << (7 * num_bytes) > val);
+        }
+        if num_bytes > 1 {
+            assert!(1u64 << (7 * (num_bytes - 1)) <= val);
+        }
+        let serdeser_val = VInt::deserialize(&mut &v[..]).unwrap();
+        assert_eq!(val, serdeser_val.0);
+    }
+
+    #[test]
+    fn test_vint() {
+        aux_test_vint(0);
+        aux_test_vint(1);
+        aux_test_vint(5);
+        aux_test_vint(u64::max_value());
+        for i in 1..9 {
+            let power_of_128 = 1u64 << (7 * i);
+            aux_test_vint(power_of_128 - 1u64);
+            aux_test_vint(power_of_128);
+            aux_test_vint(power_of_128 + 1u64);
+        }
+        aux_test_vint(10);
+    }
+
+    fn aux_test_serialize_vint_u32(val: u32) {
+        let mut buffer = [0u8; 10];
+        let mut buffer2 = [0u8; 8];
+        let len_vint = VInt(val as u64).serialize_into(&mut buffer);
+        let res2 = serialize_vint_u32(val, &mut buffer2);
+        assert_eq!(&buffer[..len_vint], res2, "array wrong for {}", val);
+    }
+
+    #[test]
+    fn test_vint_u32() {
+        aux_test_serialize_vint_u32(0);
+        aux_test_serialize_vint_u32(1);
+        aux_test_serialize_vint_u32(5);
+        for i in 1..3 {
+            let power_of_128 = 1u32 << (7 * i);
+            aux_test_serialize_vint_u32(power_of_128 - 1u32);
+            aux_test_serialize_vint_u32(power_of_128);
+            aux_test_serialize_vint_u32(power_of_128 + 1u32);
+        }
+        aux_test_serialize_vint_u32(u32::max_value());
    }
 }
--- a/src/compression/stream.rs
+++ b/src/compression/stream.rs
@@ -1,160 +0,0 @@
-use compression::compressed_block_size;
-use compression::BlockDecoder;
-use compression::COMPRESSION_BLOCK_SIZE;
-use directory::ReadOnlySource;
-use owned_read::OwnedRead;
-
-/// Reads a stream of compressed ints.
-///
-/// Tantivy uses `CompressedIntStream` to read
-/// the position file.
-/// The `.skip(...)` makes it possible to avoid
-/// decompressing blocks that are not required.
-pub struct CompressedIntStream {
-    buffer: OwnedRead,
-
-    block_decoder: BlockDecoder,
-    cached_addr: usize,      // address of the currently decoded block
-    cached_next_addr: usize, // address following the currently decoded block
-
-    addr: usize, // address of the block associated to the current position
-    inner_offset: usize,
-}
-
-impl CompressedIntStream {
-    /// Opens a compressed int stream.
-    pub(crate) fn wrap(source: ReadOnlySource) -> CompressedIntStream {
-        CompressedIntStream {
-            buffer: OwnedRead::new(source),
-            block_decoder: BlockDecoder::new(),
-            cached_addr: usize::max_value(),
-            cached_next_addr: usize::max_value(),
-
-            addr: 0,
-            inner_offset: 0,
-        }
-    }
-
-    /// Loads the block at the given address and return the address of the
-    /// following block
-    pub fn read_block(&mut self, addr: usize) -> usize {
-        if self.cached_addr == addr {
-            // we are already on this block.
-            // no need to read.
-            self.cached_next_addr
-        } else {
-            let next_addr = addr + self.block_decoder
-                .uncompress_block_unsorted(self.buffer.slice_from(addr));
-            self.cached_addr = addr;
-            self.cached_next_addr = next_addr;
-            next_addr
-        }
-    }
-
-    /// Fills a buffer with the next `output.len()` integers.
-    /// This does not consume / advance the stream.
-    pub fn read(&mut self, output: &mut [u32]) {
-        let mut cursor = self.addr;
-        let mut inner_offset = self.inner_offset;
-        let mut num_els: usize = output.len();
-        let mut start = 0;
-        loop {
-            cursor = self.read_block(cursor);
-            let block = &self.block_decoder.output_array()[inner_offset..];
-            let block_len = block.len();
-            if num_els >= block_len {
-                output[start..start + block_len].clone_from_slice(&block);
-                start += block_len;
-                num_els -= block_len;
-                inner_offset = 0;
-            } else {
-                output[start..].clone_from_slice(&block[..num_els]);
-                break;
-            }
-        }
-    }
-
-    /// Skip the next `skip_len` integer.
-    ///
-    /// If a full block is skipped, calling
-    /// `.skip(...)` will avoid decompressing it.
-    ///
-    /// May panic if the end of the stream is reached.
-    pub fn skip(&mut self, mut skip_len: usize) {
-        loop {
-            let available = COMPRESSION_BLOCK_SIZE - self.inner_offset;
-            if available >= skip_len {
-                self.inner_offset += skip_len;
-                break;
-            } else {
-                skip_len -= available;
-                // entirely skip decompressing some blocks.
-                let num_bits: u8 = self.buffer.get(self.addr);
-                let block_len = compressed_block_size(num_bits);
-                self.addr += block_len;
-                self.inner_offset = 0;
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-pub mod tests {
-
-    use super::CompressedIntStream;
-    use compression::compressed_block_size;
-    use compression::BlockEncoder;
-    use compression::COMPRESSION_BLOCK_SIZE;
-    use directory::ReadOnlySource;
-
-    fn create_stream_buffer() -> ReadOnlySource {
-        let mut buffer: Vec<u8> = vec![];
-        let mut encoder = BlockEncoder::new();
-        let vals: Vec<u32> = (0u32..1152u32).collect();
-        for chunk in vals.chunks(COMPRESSION_BLOCK_SIZE) {
-            let compressed_block = encoder.compress_block_unsorted(chunk);
-            let num_bits = compressed_block[0];
-            assert_eq!(compressed_block_size(num_bits), compressed_block.len());
-            buffer.extend_from_slice(compressed_block);
-        }
-        if cfg!(simd) {
-            buffer.extend_from_slice(&[0u8; 7]);
-        }
-        ReadOnlySource::from(buffer)
-    }
-
-    #[test]
-    fn test_compressed_int_stream() {
-        let buffer = create_stream_buffer();
-        let mut stream = CompressedIntStream::wrap(buffer);
-        let mut block: [u32; COMPRESSION_BLOCK_SIZE] = [0u32; COMPRESSION_BLOCK_SIZE];
-
-        stream.read(&mut block[0..2]);
-        assert_eq!(block[0], 0);
-        assert_eq!(block[1], 1);
-
-        // reading does not consume the stream
-        stream.read(&mut block[0..2]);
-        assert_eq!(block[0], 0);
-        assert_eq!(block[1], 1);
-        stream.skip(2);
-
-        stream.skip(5);
-        stream.read(&mut block[0..3]);
-        stream.skip(3);
-
-        assert_eq!(block[0], 7);
-        assert_eq!(block[1], 8);
-        assert_eq!(block[2], 9);
-        stream.skip(500);
-        stream.read(&mut block[0..3]);
-        stream.skip(3);
-
-        assert_eq!(block[0], 510);
-        assert_eq!(block[1], 511);
-        assert_eq!(block[2], 512);
-        stream.skip(511);
-        stream.read(&mut block[..1]);
-        assert_eq!(block[0], 1024);
-    }
-}
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -0,0 +1,141 @@
+use crossbeam::channel;
+use rayon::{ThreadPool, ThreadPoolBuilder};
+
+/// Search executor whether search request are single thread or multithread.
+///
+/// We don't expose Rayon thread pool directly here for several reasons.
+///
+/// First dependency hell. It is not a good idea to expose the
+/// API of a dependency, knowing it might conflict with a different version
+/// used by the client. Second, we may stop using rayon in the future.
+pub enum Executor {
+    /// Single thread variant of an Executor
+    SingleThread,
+    /// Thread pool variant of an Executor
+    ThreadPool(ThreadPool),
+}
+
+impl Executor {
+    /// Creates an Executor that performs all task in the caller thread.
+    pub fn single_thread() -> Executor {
+        Executor::SingleThread
+    }
+
+    /// Creates an Executor that dispatches the tasks in a thread pool.
+    pub fn multi_thread(num_threads: usize, prefix: &'static str) -> crate::Result<Executor> {
+        let pool = ThreadPoolBuilder::new()
+            .num_threads(num_threads)
+            .thread_name(move |num| format!("{}{}", prefix, num))
+            .build()?;
+        Ok(Executor::ThreadPool(pool))
+    }
+
+    /// Perform a map in the thread pool.
+    ///
+    /// Regardless of the executor (`SingleThread` or `ThreadPool`), panics in the task
+    /// will propagate to the caller.
+    pub fn map<
+        A: Send,
+        R: Send,
+        AIterator: Iterator<Item = A>,
+        F: Sized + Sync + Fn(A) -> crate::Result<R>,
+    >(
+        &self,
+        f: F,
+        args: AIterator,
+    ) -> crate::Result<Vec<R>> {
+        match self {
+            Executor::SingleThread => args.map(f).collect::<crate::Result<_>>(),
+            Executor::ThreadPool(pool) => {
+                let args_with_indices: Vec<(usize, A)> = args.enumerate().collect();
+                let num_fruits = args_with_indices.len();
+                let fruit_receiver = {
+                    let (fruit_sender, fruit_receiver) = channel::unbounded();
+                    pool.scope(|scope| {
+                        for arg_with_idx in args_with_indices {
+                            scope.spawn(|_| {
+                                let (idx, arg) = arg_with_idx;
+                                let fruit = f(arg);
+                                if let Err(err) = fruit_sender.send((idx, fruit)) {
+                                    error!("Failed to send search task. It probably means all search threads have panicked. {:?}", err);
+                                }
+                            });
+                        }
+                    });
+                    fruit_receiver
+                    // This ends the scope of fruit_sender.
+                    // This is important as it makes it possible for the fruit_receiver iteration to
+                    // terminate.
+                };
+                // This is lame, but safe.
+                let mut results_with_position = Vec::with_capacity(num_fruits);
+                for (pos, fruit_res) in fruit_receiver {
+                    let fruit = fruit_res?;
+                    results_with_position.push((pos, fruit));
+                }
+                results_with_position.sort_by_key(|(pos, _)| *pos);
+                assert_eq!(results_with_position.len(), num_fruits);
+                Ok(results_with_position
+                    .into_iter()
+                    .map(|(_, fruit)| fruit)
+                    .collect::<Vec<_>>())
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::Executor;
+
+    #[test]
+    #[should_panic(expected = "panic should propagate")]
+    fn test_panic_propagates_single_thread() {
+        let _result: Vec<usize> = Executor::single_thread()
+            .map(
+                |_| {
+                    panic!("panic should propagate");
+                },
+                vec![0].into_iter(),
+            )
+            .unwrap();
+    }
+
+    #[test]
+    #[should_panic] //< unfortunately the panic message is not propagated
+    fn test_panic_propagates_multi_thread() {
+        let _result: Vec<usize> = Executor::multi_thread(1, "search-test")
+            .unwrap()
+            .map(
+                |_| {
+                    panic!("panic should propagate");
+                },
+                vec![0].into_iter(),
+            )
+            .unwrap();
+    }
+
+    #[test]
+    fn test_map_singlethread() {
+        let result: Vec<usize> = Executor::single_thread()
+            .map(|i| Ok(i * 2), 0..1_000)
+            .unwrap();
+        assert_eq!(result.len(), 1_000);
+        for i in 0..1_000 {
+            assert_eq!(result[i], i * 2);
+        }
+    }
+
+    #[test]
+    fn test_map_multithread() {
+        let result: Vec<usize> = Executor::multi_thread(3, "search-test")
+            .unwrap()
+            .map(|i| Ok(i * 2), 0..10)
+            .unwrap();
+        assert_eq!(result.len(), 10);
+        for i in 0..10 {
+            assert_eq!(result[i], i * 2);
+        }
+    }
+}
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -1,51 +1,91 @@
-use core::SegmentId;
-use error::{ErrorKind, ResultExt};
-use schema::Schema;
-use serde_json;
-use std::borrow::BorrowMut;
-use std::fmt;
-use std::sync::Arc;
-use Result;
-
-use super::pool::LeasedItem;
-use super::pool::Pool;
-use super::segment::create_segment;
 use super::segment::Segment;
-use core::searcher::Searcher;
-use core::IndexMeta;
-use core::SegmentMeta;
-use core::SegmentReader;
-use core::META_FILEPATH;
-use directory::ManagedDirectory;
+use crate::core::Executor;
+use crate::core::IndexMeta;
+use crate::core::SegmentId;
+use crate::core::SegmentMeta;
+use crate::core::SegmentMetaInventory;
+use crate::core::META_FILEPATH;
+use crate::directory::ManagedDirectory;
+#[cfg(feature = "mmap")]
+use crate::directory::MmapDirectory;
+use crate::directory::INDEX_WRITER_LOCK;
+use crate::directory::{Directory, RAMDirectory};
+use crate::error::DataCorruption;
+use crate::error::TantivyError;
+use crate::indexer::index_writer::HEAP_SIZE_MIN;
+use crate::indexer::segment_updater::save_new_metas;
+use crate::reader::IndexReader;
+use crate::reader::IndexReaderBuilder;
+use crate::schema::Field;
+use crate::schema::FieldType;
+use crate::schema::Schema;
+use crate::tokenizer::{TextAnalyzer, TokenizerManager};
+use crate::IndexWriter;
+use std::borrow::BorrowMut;
+use std::collections::HashSet;
+use std::fmt;
+
 #[cfg(feature = "mmap")]
-use directory::MmapDirectory;
-use directory::{Directory, RAMDirectory};
-use indexer::index_writer::open_index_writer;
-use indexer::index_writer::HEAP_SIZE_MIN;
-use indexer::segment_updater::save_new_metas;
-use indexer::DirectoryLock;
-use num_cpus;
 use std::path::Path;
-use tokenizer::TokenizerManager;
-use IndexWriter;
+use std::path::PathBuf;
+use std::sync::Arc;

-const NUM_SEARCHERS: usize = 12;
-
-fn load_metas(directory: &Directory) -> Result<IndexMeta> {
+fn load_metas(
+    directory: &dyn Directory,
+    inventory: &SegmentMetaInventory,
+) -> crate::Result<IndexMeta> {
    let meta_data = directory.atomic_read(&META_FILEPATH)?;
    let meta_string = String::from_utf8_lossy(&meta_data);
-    serde_json::from_str(&meta_string).chain_err(|| ErrorKind::CorruptedFile(META_FILEPATH.clone()))
+    IndexMeta::deserialize(&meta_string, &inventory)
+        .map_err(|e| {
+            DataCorruption::new(
+                META_FILEPATH.to_path_buf(),
+                format!("Meta file cannot be deserialized. {:?}.", e),
+            )
+        })
+        .map_err(From::from)
 }

 /// Search Index
+#[derive(Clone)]
 pub struct Index {
    directory: ManagedDirectory,
    schema: Schema,
-    searcher_pool: Arc<Pool<Searcher>>,
+    executor: Arc<Executor>,
    tokenizers: TokenizerManager,
+    inventory: SegmentMetaInventory,
 }

 impl Index {
+    /// Examines the director to see if it contains an index
+    pub fn exists<Dir: Directory>(dir: &Dir) -> bool {
+        dir.exists(&META_FILEPATH)
+    }
+
+    /// Accessor to the search executor.
+    ///
+    /// This pool is used by default when calling `searcher.search(...)`
+    /// to perform search on the individual segments.
+    ///
+    /// By default the executor is single thread, and simply runs in the calling thread.
+    pub fn search_executor(&self) -> &Executor {
+        self.executor.as_ref()
+    }
+
+    /// Replace the default single thread search executor pool
+    /// by a thread pool with a given number of threads.
+    pub fn set_multithread_executor(&mut self, num_threads: usize) -> crate::Result<()> {
+        self.executor = Arc::new(Executor::multi_thread(num_threads, "thrd-tantivy-search-")?);
+        Ok(())
+    }
+
+    /// Replace the default single thread search executor pool
+    /// by a thread pool with a given number of threads.
+    pub fn set_default_multithread_executor(&mut self) -> crate::Result<()> {
+        let default_num_threads = num_cpus::get();
+        self.set_multithread_executor(default_num_threads)
+    }
+
    /// Creates a new index using the `RAMDirectory`.
    ///
    /// The index will be allocated in anonymous memory.
@@ -60,11 +100,32 @@ impl Index {
    ///
    /// If a previous index was in this directory, then its meta file will be destroyed.
    #[cfg(feature = "mmap")]
-    pub fn create_in_dir<P: AsRef<Path>>(directory_path: P, schema: Schema) -> Result<Index> {
+    pub fn create_in_dir<P: AsRef<Path>>(
+        directory_path: P,
+        schema: Schema,
+    ) -> crate::Result<Index> {
        let mmap_directory = MmapDirectory::open(directory_path)?;
+        if Index::exists(&mmap_directory) {
+            return Err(TantivyError::IndexAlreadyExists);
+        }
        Index::create(mmap_directory, schema)
    }

+    /// Opens or creates a new index in the provided directory
+    pub fn open_or_create<Dir: Directory>(dir: Dir, schema: Schema) -> crate::Result<Index> {
+        if !Index::exists(&dir) {
+            return Index::create(dir, schema);
+        }
+        let index = Index::open(dir)?;
+        if index.schema() == schema {
+            Ok(index)
+        } else {
+            Err(TantivyError::SchemaError(
+                "An index exists but the schema does not match.".to_string(),
+            ))
+        }
+    }
+
    /// Creates a new index in a temp directory.
    ///
    /// The index will use the `MMapDirectory` in a newly created directory.
@@ -74,35 +135,40 @@ impl Index {
    /// The temp directory is only used for testing the `MmapDirectory`.
    /// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
    #[cfg(feature = "mmap")]
-    pub fn create_from_tempdir(schema: Schema) -> Result<Index> {
+    pub fn create_from_tempdir(schema: Schema) -> crate::Result<Index> {
        let mmap_directory = MmapDirectory::create_from_tempdir()?;
        Index::create(mmap_directory, schema)
    }

    /// Creates a new index given an implementation of the trait `Directory`
-    pub fn create<Dir: Directory>(dir: Dir, schema: Schema) -> Result<Index> {
-        let directory = ManagedDirectory::new(dir)?;
+    pub fn create<Dir: Directory>(dir: Dir, schema: Schema) -> crate::Result<Index> {
+        let directory = ManagedDirectory::wrap(dir)?;
        Index::from_directory(directory, schema)
    }

    /// Create a new index from a directory.
-    fn from_directory(mut directory: ManagedDirectory, schema: Schema) -> Result<Index> {
-        save_new_metas(schema.clone(), 0, directory.borrow_mut())?;
+    ///
+    /// This will overwrite existing meta.json
+    fn from_directory(mut directory: ManagedDirectory, schema: Schema) -> crate::Result<Index> {
+        save_new_metas(schema.clone(), directory.borrow_mut())?;
        let metas = IndexMeta::with_schema(schema);
-        Index::create_from_metas(directory, &metas)
+        Index::create_from_metas(directory, &metas, SegmentMetaInventory::default())
    }

    /// Creates a new index given a directory and an `IndexMeta`.
-    fn create_from_metas(directory: ManagedDirectory, metas: &IndexMeta) -> Result<Index> {
+    fn create_from_metas(
+        directory: ManagedDirectory,
+        metas: &IndexMeta,
+        inventory: SegmentMetaInventory,
+    ) -> crate::Result<Index> {
        let schema = metas.schema.clone();
-        let index = Index {
+        Ok(Index {
            directory,
            schema,
-            searcher_pool: Arc::new(Pool::new()),
            tokenizers: TokenizerManager::default(),
-        };
-        index.load_searchers()?;
-        Ok(index)
+            executor: Arc::new(Executor::single_thread()),
+            inventory,
+        })
    }

    /// Accessor for the tokenizer manager.
@@ -110,23 +176,79 @@ impl Index {
        &self.tokenizers
    }

+    /// Helper to access the tokenizer associated to a specific field.
+    pub fn tokenizer_for_field(&self, field: Field) -> crate::Result<TextAnalyzer> {
+        let field_entry = self.schema.get_field_entry(field);
+        let field_type = field_entry.field_type();
+        let tokenizer_manager: &TokenizerManager = self.tokenizers();
+        let tokenizer_name_opt: Option<TextAnalyzer> = match field_type {
+            FieldType::Str(text_options) => text_options
+                .get_indexing_options()
+                .map(|text_indexing_options| text_indexing_options.tokenizer().to_string())
+                .and_then(|tokenizer_name| tokenizer_manager.get(&tokenizer_name)),
+            _ => None,
+        };
+        match tokenizer_name_opt {
+            Some(tokenizer) => Ok(tokenizer),
+            None => Err(TantivyError::SchemaError(format!(
+                "{:?} is not a text field.",
+                field_entry.name()
+            ))),
+        }
+    }
+
+    /// Create a default `IndexReader` for the given index.
+    ///
+    /// See [`Index.reader_builder()`](#method.reader_builder).
+    pub fn reader(&self) -> crate::Result<IndexReader> {
+        self.reader_builder().try_into()
+    }
+
+    /// Create a `IndexReader` for the given index.
+    ///
+    /// Most project should create at most one reader for a given index.
+    /// This method is typically called only once per `Index` instance,
+    /// over the lifetime of most problem.
+    pub fn reader_builder(&self) -> IndexReaderBuilder {
+        IndexReaderBuilder::new(self.clone())
+    }
+
    /// Opens a new directory from an index path.
    #[cfg(feature = "mmap")]
-    pub fn open_in_dir<P: AsRef<Path>>(directory_path: P) -> Result<Index> {
+    pub fn open_in_dir<P: AsRef<Path>>(directory_path: P) -> crate::Result<Index> {
        let mmap_directory = MmapDirectory::open(directory_path)?;
        Index::open(mmap_directory)
    }

+    /// Returns the list of the segment metas tracked by the index.
+    ///
+    /// Such segments can of course be part of the index,
+    /// but also they could be segments being currently built or in the middle of a merge
+    /// operation.
+    pub fn list_all_segment_metas(&self) -> Vec<SegmentMeta> {
+        self.inventory.all()
+    }
+
+    /// Creates a new segment_meta (Advanced user only).
+    ///
+    /// As long as the `SegmentMeta` lives, the files associated with the
+    /// `SegmentMeta` are guaranteed to not be garbage collected, regardless of
+    /// whether the segment is recorded as part of the index or not.
+    pub fn new_segment_meta(&self, segment_id: SegmentId, max_doc: u32) -> SegmentMeta {
+        self.inventory.new_segment_meta(segment_id, max_doc)
+    }
+
    /// Open the index using the provided directory
-    pub fn open<D: Directory>(directory: D) -> Result<Index> {
-        let directory = ManagedDirectory::new(directory)?;
-        let metas = load_metas(&directory)?;
-        Index::create_from_metas(directory, &metas)
+    pub fn open<D: Directory>(directory: D) -> crate::Result<Index> {
+        let directory = ManagedDirectory::wrap(directory)?;
+        let inventory = SegmentMetaInventory::default();
+        let metas = load_metas(&directory, &inventory)?;
+        Index::create_from_metas(directory, &metas, inventory)
    }

    /// Reads the index meta file from the directory.
-    pub fn load_metas(&self) -> Result<IndexMeta> {
-        load_metas(self.directory())
+    pub fn load_metas(&self) -> crate::Result<IndexMeta> {
+        load_metas(self.directory(), &self.inventory)
    }

    /// Open a new index writer. Attempts to acquire a lockfile.
@@ -145,17 +267,32 @@ impl Index {
    /// Each thread will receive a budget of  `overall_heap_size_in_bytes / num_threads`.
    ///
    /// # Errors
-    /// If the lockfile already exists, returns `Error::FileAlreadyExists`.
+    /// If the lockfile already exists, returns `Error::DirectoryLockBusy` or an `Error::IOError`.
+    ///
    /// # Panics
    /// If the heap size per thread is too small, panics.
    pub fn writer_with_num_threads(
        &self,
        num_threads: usize,
        overall_heap_size_in_bytes: usize,
-    ) -> Result<IndexWriter> {
-        let directory_lock = DirectoryLock::lock(self.directory().box_clone())?;
+    ) -> crate::Result<IndexWriter> {
+        let directory_lock = self
+            .directory
+            .acquire_lock(&INDEX_WRITER_LOCK)
+            .map_err(|err| {
+                TantivyError::LockFailure(
+                    err,
+                    Some(
+                        "Failed to acquire index lock. If you are using \
+                         a regular directory, this means there is already an \
+                         `IndexWriter` working on this `Directory`, in this process \
+                         or in a different process."
+                            .to_string(),
+                    ),
+                )
+            })?;
        let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads;
-        open_index_writer(
+        IndexWriter::new(
            self,
            num_threads,
            heap_size_in_bytes_per_thread,
@@ -173,7 +310,7 @@ impl Index {
    /// If the lockfile already exists, returns `Error::FileAlreadyExists`.
    /// # Panics
    /// If the heap size per thread is too small, panics.
-    pub fn writer(&self, overall_heap_size_in_bytes: usize) -> Result<IndexWriter> {
+    pub fn writer(&self, overall_heap_size_in_bytes: usize) -> crate::Result<IndexWriter> {
        let mut num_threads = num_cpus::get();
        let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads;
        if heap_size_in_bytes_per_thread < HEAP_SIZE_MIN {
@@ -190,8 +327,9 @@ impl Index {
    }

    /// Returns the list of segments that are searchable
-    pub fn searchable_segments(&self) -> Result<Vec<Segment>> {
-        Ok(self.searchable_segment_metas()?
+    pub fn searchable_segments(&self) -> crate::Result<Vec<Segment>> {
+        Ok(self
+            .searchable_segment_metas()?
            .into_iter()
            .map(|segment_meta| self.segment(segment_meta))
            .collect())
@@ -199,12 +337,14 @@ impl Index {

    #[doc(hidden)]
    pub fn segment(&self, segment_meta: SegmentMeta) -> Segment {
-        create_segment(self.clone(), segment_meta)
+        Segment::for_index(self.clone(), segment_meta)
    }

    /// Creates a new segment.
    pub fn new_segment(&self) -> Segment {
-        let segment_meta = SegmentMeta::new(SegmentId::generate_random(), 0);
+        let segment_meta = self
+            .inventory
+            .new_segment_meta(SegmentId::generate_random(), 0);
        self.segment(segment_meta)
    }

@@ -220,66 +360,243 @@ impl Index {

    /// Reads the meta.json and returns the list of
    /// `SegmentMeta` from the last commit.
-    pub fn searchable_segment_metas(&self) -> Result<Vec<SegmentMeta>> {
+    pub fn searchable_segment_metas(&self) -> crate::Result<Vec<SegmentMeta>> {
        Ok(self.load_metas()?.segments)
    }

    /// Returns the list of segment ids that are searchable.
-    pub fn searchable_segment_ids(&self) -> Result<Vec<SegmentId>> {
-        Ok(self.searchable_segment_metas()?
+    pub fn searchable_segment_ids(&self) -> crate::Result<Vec<SegmentId>> {
+        Ok(self
+            .searchable_segment_metas()?
            .iter()
-            .map(|segment_meta| segment_meta.id())
+            .map(SegmentMeta::id)
            .collect())
    }

-    /// Creates a new generation of searchers after
-
-    /// a change of the set of searchable indexes.
-    ///
-    /// This needs to be called when a new segment has been
-    /// published or after a merge.
-    pub fn load_searchers(&self) -> Result<()> {
-        let searchable_segments = self.searchable_segments()?;
-        let segment_readers: Vec<SegmentReader> = searchable_segments
-            .iter()
-            .map(SegmentReader::open)
-            .collect::<Result<_>>()?;
-        let schema = self.schema();
-        let searchers = (0..NUM_SEARCHERS)
-            .map(|_| Searcher::new(schema.clone(), segment_readers.clone()))
-            .collect();
-        self.searcher_pool.publish_new_generation(searchers);
-        Ok(())
-    }
-
-    /// Returns a searcher
-    ///
-    /// This method should be called every single time a search
-    /// query is performed.
-    /// The searchers are taken from a pool of `NUM_SEARCHERS` searchers.
-    /// If no searcher is available
-    /// this may block.
-    ///
-    /// The same searcher must be used for a given query, as it ensures
-    /// the use of a consistent segment set.
-    pub fn searcher(&self) -> LeasedItem<Searcher> {
-        self.searcher_pool.acquire()
+    /// Returns the set of corrupted files
+    pub fn validate_checksum(&self) -> crate::Result<HashSet<PathBuf>> {
+        self.directory.list_damaged().map_err(Into::into)
    }
 }

 impl fmt::Debug for Index {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "Index({:?})", self.directory)
    }
 }

-impl Clone for Index {
-    fn clone(&self) -> Index {
-        Index {
-            directory: self.directory.clone(),
-            schema: self.schema.clone(),
-            searcher_pool: Arc::clone(&self.searcher_pool),
-            tokenizers: self.tokenizers.clone(),
+#[cfg(test)]
+mod tests {
+    use crate::directory::RAMDirectory;
+    use crate::schema::Field;
+    use crate::schema::{Schema, INDEXED, TEXT};
+    use crate::IndexReader;
+    use crate::ReloadPolicy;
+    use crate::{Directory, Index};
+
+    #[test]
+    fn test_indexer_for_field() {
+        let mut schema_builder = Schema::builder();
+        let num_likes_field = schema_builder.add_u64_field("num_likes", INDEXED);
+        let body_field = schema_builder.add_text_field("body", TEXT);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        assert!(index.tokenizer_for_field(body_field).is_ok());
+        assert_eq!(
+            format!("{:?}", index.tokenizer_for_field(num_likes_field).err()),
+            "Some(SchemaError(\"\\\"num_likes\\\" is not a text field.\"))"
+        );
+    }
+
+    #[test]
+    fn test_index_exists() {
+        let directory = RAMDirectory::create();
+        assert!(!Index::exists(&directory));
+        assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
+        assert!(Index::exists(&directory));
+    }
+
+    #[test]
+    fn open_or_create_should_create() {
+        let directory = RAMDirectory::create();
+        assert!(!Index::exists(&directory));
+        assert!(Index::open_or_create(directory.clone(), throw_away_schema()).is_ok());
+        assert!(Index::exists(&directory));
+    }
+
+    #[test]
+    fn open_or_create_should_open() {
+        let directory = RAMDirectory::create();
+        assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
+        assert!(Index::exists(&directory));
+        assert!(Index::open_or_create(directory, throw_away_schema()).is_ok());
+    }
+
+    #[test]
+    fn create_should_wipeoff_existing() {
+        let directory = RAMDirectory::create();
+        assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
+        assert!(Index::exists(&directory));
+        assert!(Index::create(directory.clone(), Schema::builder().build()).is_ok());
+    }
+
+    #[test]
+    fn open_or_create_exists_but_schema_does_not_match() {
+        let directory = RAMDirectory::create();
+        assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
+        assert!(Index::exists(&directory));
+        assert!(Index::open_or_create(directory.clone(), throw_away_schema()).is_ok());
+        let err = Index::open_or_create(directory, Schema::builder().build());
+        assert_eq!(
+            format!("{:?}", err.unwrap_err()),
+            "SchemaError(\"An index exists but the schema does not match.\")"
+        );
+    }
+
+    fn throw_away_schema() -> Schema {
+        let mut schema_builder = Schema::builder();
+        let _ = schema_builder.add_u64_field("num_likes", INDEXED);
+        schema_builder.build()
+    }
+
+    #[test]
+    fn test_index_on_commit_reload_policy() {
+        let schema = throw_away_schema();
+        let field = schema.get_field("num_likes").unwrap();
+        let index = Index::create_in_ram(schema);
+        let reader = index
+            .reader_builder()
+            .reload_policy(ReloadPolicy::OnCommit)
+            .try_into()
+            .unwrap();
+        assert_eq!(reader.searcher().num_docs(), 0);
+        test_index_on_commit_reload_policy_aux(field, &index, &reader);
+    }
+
+    #[cfg(feature = "mmap")]
+    mod mmap_specific {
+
+        use super::*;
+        use crate::Directory;
+        use std::path::PathBuf;
+        use tempfile::TempDir;
+
+        #[test]
+        fn test_index_on_commit_reload_policy_mmap() {
+            let schema = throw_away_schema();
+            let field = schema.get_field("num_likes").unwrap();
+            let tempdir = TempDir::new().unwrap();
+            let tempdir_path = PathBuf::from(tempdir.path());
+            let index = Index::create_in_dir(&tempdir_path, schema).unwrap();
+            let reader = index
+                .reader_builder()
+                .reload_policy(ReloadPolicy::OnCommit)
+                .try_into()
+                .unwrap();
+            assert_eq!(reader.searcher().num_docs(), 0);
+            test_index_on_commit_reload_policy_aux(field, &index, &reader);
+        }
+
+        #[test]
+        fn test_index_manual_policy_mmap() {
+            let schema = throw_away_schema();
+            let field = schema.get_field("num_likes").unwrap();
+            let mut index = Index::create_from_tempdir(schema).unwrap();
+            let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+            writer.commit().unwrap();
+            let reader = index
+                .reader_builder()
+                .reload_policy(ReloadPolicy::Manual)
+                .try_into()
+                .unwrap();
+            assert_eq!(reader.searcher().num_docs(), 0);
+            writer.add_document(doc!(field=>1u64));
+            let (sender, receiver) = crossbeam::channel::unbounded();
+            let _handle = index.directory_mut().watch(Box::new(move || {
+                let _ = sender.send(());
+            }));
+            writer.commit().unwrap();
+            assert!(receiver.recv().is_ok());
+            assert_eq!(reader.searcher().num_docs(), 0);
+            reader.reload().unwrap();
+            assert_eq!(reader.searcher().num_docs(), 1);
+        }
+
+        #[test]
+        fn test_index_on_commit_reload_policy_different_directories() {
+            let schema = throw_away_schema();
+            let field = schema.get_field("num_likes").unwrap();
+            let tempdir = TempDir::new().unwrap();
+            let tempdir_path = PathBuf::from(tempdir.path());
+            let write_index = Index::create_in_dir(&tempdir_path, schema).unwrap();
+            let read_index = Index::open_in_dir(&tempdir_path).unwrap();
+            let reader = read_index
+                .reader_builder()
+                .reload_policy(ReloadPolicy::OnCommit)
+                .try_into()
+                .unwrap();
+            assert_eq!(reader.searcher().num_docs(), 0);
+            test_index_on_commit_reload_policy_aux(field, &write_index, &reader);
        }
    }
+
+    fn test_index_on_commit_reload_policy_aux(field: Field, index: &Index, reader: &IndexReader) {
+        let mut reader_index = reader.index();
+        let (sender, receiver) = crossbeam::channel::unbounded();
+        let _watch_handle = reader_index.directory_mut().watch(Box::new(move || {
+            let _ = sender.send(());
+        }));
+        let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        assert_eq!(reader.searcher().num_docs(), 0);
+        writer.add_document(doc!(field=>1u64));
+        writer.commit().unwrap();
+        assert!(receiver.recv().is_ok());
+        assert_eq!(reader.searcher().num_docs(), 1);
+        writer.add_document(doc!(field=>2u64));
+        writer.commit().unwrap();
+        assert!(receiver.recv().is_ok());
+        assert_eq!(reader.searcher().num_docs(), 2);
+    }
+
+    // This test will not pass on windows, because windows
+    // prevent deleting files that are MMapped.
+    #[cfg(not(target_os = "windows"))]
+    #[test]
+    fn garbage_collect_works_as_intended() {
+        let directory = RAMDirectory::create();
+        let schema = throw_away_schema();
+        let field = schema.get_field("num_likes").unwrap();
+        let index = Index::create(directory.clone(), schema).unwrap();
+
+        let mut writer = index.writer_with_num_threads(8, 24_000_000).unwrap();
+        for i in 0u64..8_000u64 {
+            writer.add_document(doc!(field => i));
+        }
+        let (sender, receiver) = crossbeam::channel::unbounded();
+        let _handle = directory.watch(Box::new(move || {
+            let _ = sender.send(());
+        }));
+        writer.commit().unwrap();
+        let mem_right_after_commit = directory.total_mem_usage();
+        assert!(receiver.recv().is_ok());
+        let reader = index
+            .reader_builder()
+            .reload_policy(ReloadPolicy::Manual)
+            .try_into()
+            .unwrap();
+
+        assert_eq!(reader.searcher().num_docs(), 8_000);
+        writer.wait_merging_threads().unwrap();
+        let mem_right_after_merge_finished = directory.total_mem_usage();
+
+        reader.reload().unwrap();
+        let searcher = reader.searcher();
+        assert_eq!(searcher.num_docs(), 8_000);
+        assert!(
+            mem_right_after_merge_finished < mem_right_after_commit,
+            "(mem after merge){} is expected < (mem before merge){}",
+            mem_right_after_merge_finished,
+            mem_right_after_commit
+        );
+    }
 }
--- a/src/core/index_meta.rs
+++ b/src/core/index_meta.rs
@@ -1,7 +1,198 @@
-use core::SegmentMeta;
-use schema::Schema;
-use serde_json;
+use super::SegmentComponent;
+use crate::core::SegmentId;
+use crate::schema::Schema;
+use crate::Opstamp;
+use census::{Inventory, TrackedObject};
+use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
 use std::fmt;
+use std::path::PathBuf;
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+struct DeleteMeta {
+    num_deleted_docs: u32,
+    opstamp: Opstamp,
+}
+
+#[derive(Clone, Default)]
+pub struct SegmentMetaInventory {
+    inventory: Inventory<InnerSegmentMeta>,
+}
+
+impl SegmentMetaInventory {
+    /// Lists all living `SegmentMeta` object at the time of the call.
+    pub fn all(&self) -> Vec<SegmentMeta> {
+        self.inventory
+            .list()
+            .into_iter()
+            .map(SegmentMeta::from)
+            .collect::<Vec<_>>()
+    }
+
+    pub fn new_segment_meta(&self, segment_id: SegmentId, max_doc: u32) -> SegmentMeta {
+        let inner = InnerSegmentMeta {
+            segment_id,
+            max_doc,
+            deletes: None,
+        };
+        SegmentMeta::from(self.inventory.track(inner))
+    }
+}
+
+/// `SegmentMeta` contains simple meta information about a segment.
+///
+/// For instance the number of docs it contains,
+/// how many are deleted, etc.
+#[derive(Clone)]
+pub struct SegmentMeta {
+    tracked: TrackedObject<InnerSegmentMeta>,
+}
+
+impl fmt::Debug for SegmentMeta {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        self.tracked.fmt(f)
+    }
+}
+
+impl serde::Serialize for SegmentMeta {
+    fn serialize<S>(
+        &self,
+        serializer: S,
+    ) -> Result<<S as serde::Serializer>::Ok, <S as serde::Serializer>::Error>
+    where
+        S: serde::Serializer,
+    {
+        self.tracked.serialize(serializer)
+    }
+}
+
+impl From<TrackedObject<InnerSegmentMeta>> for SegmentMeta {
+    fn from(tracked: TrackedObject<InnerSegmentMeta>) -> SegmentMeta {
+        SegmentMeta { tracked }
+    }
+}
+
+impl SegmentMeta {
+    // Creates a new `SegmentMeta` object.
+
+    /// Returns the segment id.
+    pub fn id(&self) -> SegmentId {
+        self.tracked.segment_id
+    }
+
+    /// Returns the number of deleted documents.
+    pub fn num_deleted_docs(&self) -> u32 {
+        self.tracked
+            .deletes
+            .as_ref()
+            .map(|delete_meta| delete_meta.num_deleted_docs)
+            .unwrap_or(0u32)
+    }
+
+    /// Returns the list of files that
+    /// are required for the segment meta.
+    ///
+    /// This is useful as the way tantivy removes files
+    /// is by removing all files that have been created by tantivy
+    /// and are not used by any segment anymore.
+    pub fn list_files(&self) -> HashSet<PathBuf> {
+        SegmentComponent::iterator()
+            .map(|component| self.relative_path(*component))
+            .collect::<HashSet<PathBuf>>()
+    }
+
+    /// Returns the relative path of a component of our segment.
+    ///
+    /// It just joins the segment id with the extension
+    /// associated to a segment component.
+    pub fn relative_path(&self, component: SegmentComponent) -> PathBuf {
+        let mut path = self.id().uuid_string();
+        path.push_str(&*match component {
+            SegmentComponent::POSTINGS => ".idx".to_string(),
+            SegmentComponent::POSITIONS => ".pos".to_string(),
+            SegmentComponent::POSITIONSSKIP => ".posidx".to_string(),
+            SegmentComponent::TERMS => ".term".to_string(),
+            SegmentComponent::STORE => ".store".to_string(),
+            SegmentComponent::FASTFIELDS => ".fast".to_string(),
+            SegmentComponent::FIELDNORMS => ".fieldnorm".to_string(),
+            SegmentComponent::DELETE => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
+        });
+        PathBuf::from(path)
+    }
+
+    /// Return the highest doc id + 1
+    ///
+    /// If there are no deletes, then num_docs = max_docs
+    /// and all the doc ids contains in this segment
+    /// are exactly (0..max_doc).
+    pub fn max_doc(&self) -> u32 {
+        self.tracked.max_doc
+    }
+
+    /// Return the number of documents in the segment.
+    pub fn num_docs(&self) -> u32 {
+        self.max_doc() - self.num_deleted_docs()
+    }
+
+    /// Returns the `Opstamp` of the last delete operation
+    /// taken in account in this segment.
+    pub fn delete_opstamp(&self) -> Option<Opstamp> {
+        self.tracked
+            .deletes
+            .as_ref()
+            .map(|delete_meta| delete_meta.opstamp)
+    }
+
+    /// Returns true iff the segment meta contains
+    /// delete information.
+    pub fn has_deletes(&self) -> bool {
+        self.num_deleted_docs() > 0
+    }
+
+    /// Updates the max_doc value from the `SegmentMeta`.
+    ///
+    /// This method is only used when updating `max_doc` from 0
+    /// as we finalize a fresh new segment.
+    pub(crate) fn with_max_doc(self, max_doc: u32) -> SegmentMeta {
+        assert_eq!(self.tracked.max_doc, 0);
+        assert!(self.tracked.deletes.is_none());
+        let tracked = self.tracked.map(move |inner_meta| InnerSegmentMeta {
+            segment_id: inner_meta.segment_id,
+            max_doc,
+            deletes: None,
+        });
+        SegmentMeta { tracked }
+    }
+
+    #[doc(hidden)]
+    pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: Opstamp) -> SegmentMeta {
+        let delete_meta = DeleteMeta {
+            num_deleted_docs,
+            opstamp,
+        };
+        let tracked = self.tracked.map(move |inner_meta| InnerSegmentMeta {
+            segment_id: inner_meta.segment_id,
+            max_doc: inner_meta.max_doc,
+            deletes: Some(delete_meta),
+        });
+        SegmentMeta { tracked }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct InnerSegmentMeta {
+    segment_id: SegmentId,
+    max_doc: u32,
+    deletes: Option<DeleteMeta>,
+}
+
+impl InnerSegmentMeta {
+    pub fn track(self, inventory: &SegmentMetaInventory) -> SegmentMeta {
+        SegmentMeta {
+            tracked: inventory.inventory.track(self),
+        }
+    }
+}

 /// Meta information about the `Index`.
 ///
@@ -11,16 +202,53 @@ use std::fmt;
 /// * the index `docstamp`
 /// * the schema
 ///
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Clone, Serialize)]
 pub struct IndexMeta {
+    /// List of `SegmentMeta` informations associated to each finalized segment of the index.
    pub segments: Vec<SegmentMeta>,
+    /// Index `Schema`
    pub schema: Schema,
-    pub opstamp: u64,
+    /// Opstamp associated to the last `commit` operation.
+    pub opstamp: Opstamp,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    /// Payload associated to the last commit.
+    ///
+    /// Upon commit, clients can optionally add a small `String` payload to their commit
+    /// to help identify this commit.
+    /// This payload is entirely unused by tantivy.
+    pub payload: Option<String>,
+}
+
+#[derive(Deserialize)]
+struct UntrackedIndexMeta {
+    pub segments: Vec<InnerSegmentMeta>,
+    pub schema: Schema,
+    pub opstamp: Opstamp,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub payload: Option<String>,
 }

+impl UntrackedIndexMeta {
+    pub fn track(self, inventory: &SegmentMetaInventory) -> IndexMeta {
+        IndexMeta {
+            segments: self
+                .segments
+                .into_iter()
+                .map(|inner_seg_meta| inner_seg_meta.track(inventory))
+                .collect::<Vec<SegmentMeta>>(),
+            schema: self.schema,
+            opstamp: self.opstamp,
+            payload: self.payload,
+        }
+    }
+}
+
 impl IndexMeta {
+    /// Create an `IndexMeta` object representing a brand new `Index`
+    /// with the given index.
+    ///
+    /// This new index does not contains any segments.
+    /// Opstamp will the value `0u64`.
    pub fn with_schema(schema: Schema) -> IndexMeta {
        IndexMeta {
            segments: vec![],
@@ -29,10 +257,18 @@ impl IndexMeta {
            payload: None,
        }
    }
+
+    pub(crate) fn deserialize(
+        meta_json: &str,
+        inventory: &SegmentMetaInventory,
+    ) -> serde_json::Result<IndexMeta> {
+        let untracked_meta_json: UntrackedIndexMeta = serde_json::from_str(meta_json)?;
+        Ok(untracked_meta_json.track(inventory))
+    }
 }

 impl fmt::Debug for IndexMeta {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "{}",
@@ -46,23 +282,26 @@ impl fmt::Debug for IndexMeta {
 mod tests {

    use super::IndexMeta;
-    use schema::{SchemaBuilder, TEXT};
+    use crate::schema::{Schema, TEXT};
    use serde_json;

    #[test]
    fn test_serialize_metas() {
        let schema = {
-            let mut schema_builder = SchemaBuilder::new();
+            let mut schema_builder = Schema::builder();
            schema_builder.add_text_field("text", TEXT);
            schema_builder.build()
        };
        let index_metas = IndexMeta {
            segments: Vec::new(),
-            schema: schema,
+            schema,
            opstamp: 0u64,
            payload: None,
        };
        let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
-        assert_eq!(json, r#"{"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","tokenizer":"default"},"stored":false}}],"opstamp":0}"#);
+        assert_eq!(
+            json,
+            r#"{"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","tokenizer":"default"},"stored":false}}],"opstamp":0}"#
+        );
    }
 }
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -1,14 +1,12 @@
-use common::BinarySerializable;
-use compression::CompressedIntStream;
-use directory::ReadOnlySource;
-use postings::FreqReadingOption;
-use postings::TermInfo;
-use postings::{BlockSegmentPostings, SegmentPostings};
-use schema::FieldType;
-use schema::IndexRecordOption;
-use schema::Term;
-use termdict::TermDictionary;
-use owned_read::OwnedRead;
+use crate::common::BinarySerializable;
+use crate::directory::ReadOnlySource;
+use crate::positions::PositionReader;
+use crate::postings::TermInfo;
+use crate::postings::{BlockSegmentPostings, SegmentPostings};
+use crate::schema::FieldType;
+use crate::schema::IndexRecordOption;
+use crate::schema::Term;
+use crate::termdict::TermDictionary;

 /// The inverted index reader is in charge of accessing
 /// the inverted index associated to a specific field.
@@ -27,15 +25,18 @@ pub struct InvertedIndexReader {
    termdict: TermDictionary,
    postings_source: ReadOnlySource,
    positions_source: ReadOnlySource,
+    positions_idx_source: ReadOnlySource,
    record_option: IndexRecordOption,
    total_num_tokens: u64,
 }

 impl InvertedIndexReader {
+    #[cfg_attr(feature = "cargo-clippy", allow(clippy::needless_pass_by_value))] // for symmetry
    pub(crate) fn new(
        termdict: TermDictionary,
        postings_source: ReadOnlySource,
        positions_source: ReadOnlySource,
+        positions_idx_source: ReadOnlySource,
        record_option: IndexRecordOption,
    ) -> InvertedIndexReader {
        let total_num_tokens_data = postings_source.slice(0, 8);
@@ -45,6 +46,7 @@ impl InvertedIndexReader {
            termdict,
            postings_source: postings_source.slice_from(8),
            positions_source,
+            positions_idx_source,
            record_option,
            total_num_tokens,
        }
@@ -52,14 +54,15 @@ impl InvertedIndexReader {

    /// Creates an empty `InvertedIndexReader` object, which
    /// contains no terms at all.
-    pub fn empty(field_type: FieldType) -> InvertedIndexReader {
+    pub fn empty(field_type: &FieldType) -> InvertedIndexReader {
        let record_option = field_type
            .get_index_record_option()
            .unwrap_or(IndexRecordOption::Basic);
        InvertedIndexReader {
-            termdict: TermDictionary::empty(field_type),
+            termdict: TermDictionary::empty(),
            postings_source: ReadOnlySource::empty(),
            positions_source: ReadOnlySource::empty(),
+            positions_idx_source: ReadOnlySource::empty(),
            record_option,
            total_num_tokens: 0u64,
        }
@@ -93,8 +96,20 @@ impl InvertedIndexReader {
        let offset = term_info.postings_offset as usize;
        let end_source = self.postings_source.len();
        let postings_slice = self.postings_source.slice(offset, end_source);
-        let postings_reader = OwnedRead::new(postings_slice);
-        block_postings.reset(term_info.doc_freq as usize, postings_reader);
+        block_postings.reset(term_info.doc_freq, postings_slice);
+    }
+
+    /// Returns a block postings given a `Term`.
+    /// This method is for an advanced usage only.
+    ///
+    /// Most user should prefer using `read_postings` instead.
+    pub fn read_block_postings(
+        &self,
+        term: &Term,
+        option: IndexRecordOption,
+    ) -> Option<BlockSegmentPostings> {
+        self.get_term_info(term)
+            .map(move |term_info| self.read_block_postings_from_terminfo(&term_info, option))
    }

    /// Returns a block postings given a `term_info`.
@@ -108,15 +123,11 @@ impl InvertedIndexReader {
    ) -> BlockSegmentPostings {
        let offset = term_info.postings_offset as usize;
        let postings_data = self.postings_source.slice_from(offset);
-        let freq_reading_option = match (self.record_option, requested_option) {
-            (IndexRecordOption::Basic, _) => FreqReadingOption::NoFreq,
-            (_, IndexRecordOption::Basic) => FreqReadingOption::SkipFreq,
-            (_, _) => FreqReadingOption::ReadFreq,
-        };
        BlockSegmentPostings::from_data(
-            term_info.doc_freq as usize,
-            OwnedRead::new(postings_data),
-            freq_reading_option,
+            term_info.doc_freq,
+            postings_data,
+            self.record_option,
+            requested_option,
        )
    }

@@ -132,11 +143,11 @@ impl InvertedIndexReader {
        let block_postings = self.read_block_postings_from_terminfo(term_info, option);
        let position_stream = {
            if option.has_positions() {
-                let position_offset = term_info.positions_offset;
-                let positions_source = self.positions_source.slice_from(position_offset as usize);
-                let mut stream = CompressedIntStream::wrap(positions_source);
-                stream.skip(term_info.positions_inner_offset as usize);
-                Some(stream)
+                let position_reader = self.positions_source.clone();
+                let skip_reader = self.positions_idx_source.clone();
+                let position_reader =
+                    PositionReader::new(position_reader, skip_reader, term_info.positions_idx);
+                Some(position_reader)
            } else {
                None
            }
@@ -161,8 +172,8 @@ impl InvertedIndexReader {
    /// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
    /// with `DocId`s and frequencies.
    pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
-        let term_info = get!(self.get_term_info(term));
-        Some(self.read_postings_from_terminfo(&term_info, option))
+        self.get_term_info(term)
+            .map(move |term_info| self.read_postings_from_terminfo(&term_info, option))
    }

    pub(crate) fn read_postings_no_deletes(
@@ -170,8 +181,8 @@ impl InvertedIndexReader {
        term: &Term,
        option: IndexRecordOption,
    ) -> Option<SegmentPostings> {
-        let term_info = get!(self.get_term_info(term));
-        Some(self.read_postings_from_terminfo(&term_info, option))
+        self.get_term_info(term)
+            .map(|term_info| self.read_postings_from_terminfo(&term_info, option))
    }

    /// Returns the number of documents containing the term.
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -1,42 +1,34 @@
+mod executor;
 pub mod index;
 mod index_meta;
 mod inverted_index_reader;
-mod pool;
 pub mod searcher;
 mod segment;
 mod segment_component;
 mod segment_id;
-mod segment_meta;
 mod segment_reader;

+pub use self::executor::Executor;
 pub use self::index::Index;
-pub use self::index_meta::IndexMeta;
+pub use self::index_meta::{IndexMeta, SegmentMeta, SegmentMetaInventory};
 pub use self::inverted_index_reader::InvertedIndexReader;
 pub use self::searcher::Searcher;
 pub use self::segment::Segment;
 pub use self::segment::SerializableSegment;
 pub use self::segment_component::SegmentComponent;
 pub use self::segment_id::SegmentId;
-pub use self::segment_meta::SegmentMeta;
 pub use self::segment_reader::SegmentReader;

-use std::path::PathBuf;
+use once_cell::sync::Lazy;
+use std::path::Path;

-lazy_static! {
-    /// The meta file contains all the information about the list of segments and the schema
-    /// of the index.
-    pub static ref META_FILEPATH: PathBuf = PathBuf::from("meta.json");
+/// The meta file contains all the information about the list of segments and the schema
+/// of the index.
+pub static META_FILEPATH: Lazy<&'static Path> = Lazy::new(|| Path::new("meta.json"));

-    /// The managed file contains a list of files that were created by the tantivy
-    /// and will therefore be garbage collected when they are deemed useless by tantivy.
-    ///
-    /// Removing this file is safe, but will prevent the garbage collection of all of the file that
-    /// are currently in the directory
-    pub static ref MANAGED_FILEPATH: PathBuf = PathBuf::from(".managed.json");
-
-    /// Only one process should be able to write tantivy's index at a time.
-    /// This file, when present, is in charge of preventing other processes to open an IndexWriter.
-    ///
-    /// If the process is killed and this file remains, it is safe to remove it manually.
-    pub static ref LOCKFILE_FILEPATH: PathBuf = PathBuf::from(".tantivy-indexer.lock");
-}
+/// The managed file contains a list of files that were created by the tantivy
+/// and will therefore be garbage collected when they are deemed useless by tantivy.
+///
+/// Removing this file is safe, but will prevent the garbage collection of all of the file that
+/// are currently in the directory
+pub static MANAGED_FILEPATH: Lazy<&'static Path> = Lazy::new(|| Path::new(".managed.json"));
--- a/src/core/pool.rs
+++ b/src/core/pool.rs
@@ -1,134 +0,0 @@
-use crossbeam::sync::MsQueue;
-use std::mem;
-use std::ops::{Deref, DerefMut};
-use std::sync::atomic::AtomicUsize;
-use std::sync::atomic::Ordering;
-use std::sync::Arc;
-
-pub struct GenerationItem<T> {
-    generation: usize,
-    item: T,
-}
-
-pub struct Pool<T> {
-    queue: Arc<MsQueue<GenerationItem<T>>>,
-    freshest_generation: AtomicUsize,
-    next_generation: AtomicUsize,
-}
-
-impl<T> Pool<T> {
-    pub fn new() -> Pool<T> {
-        let queue = Arc::new(MsQueue::new());
-        Pool {
-            queue,
-            freshest_generation: AtomicUsize::default(),
-            next_generation: AtomicUsize::default(),
-        }
-    }
-
-    pub fn publish_new_generation(&self, items: Vec<T>) {
-        let next_generation = self.next_generation.fetch_add(1, Ordering::SeqCst) + 1;
-        for item in items {
-            let gen_item = GenerationItem {
-                item,
-                generation: next_generation,
-            };
-            self.queue.push(gen_item);
-        }
-        self.advertise_generation(next_generation);
-    }
-
-    /// At the exit of this method,
-    /// - freshest_generation has a value greater or equal than generation
-    /// - freshest_generation has a value that has been advertised
-    /// - freshest_generation has)
-    fn advertise_generation(&self, generation: usize) {
-        // not optimal at all but the easiest to read proof.
-        loop {
-            let former_generation = self.freshest_generation.load(Ordering::Acquire);
-            if former_generation >= generation {
-                break;
-            }
-            self.freshest_generation.compare_and_swap(
-                former_generation,
-                generation,
-                Ordering::SeqCst,
-            );
-        }
-    }
-
-    fn generation(&self) -> usize {
-        self.freshest_generation.load(Ordering::Acquire)
-    }
-
-    pub fn acquire(&self) -> LeasedItem<T> {
-        let generation = self.generation();
-        loop {
-            let gen_item = self.queue.pop();
-            if gen_item.generation >= generation {
-                return LeasedItem {
-                    gen_item: Some(gen_item),
-                    recycle_queue: Arc::clone(&self.queue),
-                };
-            } else {
-                // this searcher is obsolete,
-                // removing it from the pool.
-            }
-        }
-    }
-}
-
-pub struct LeasedItem<T> {
-    gen_item: Option<GenerationItem<T>>,
-    recycle_queue: Arc<MsQueue<GenerationItem<T>>>,
-}
-
-impl<T> Deref for LeasedItem<T> {
-    type Target = T;
-
-    fn deref(&self) -> &T {
-        &self.gen_item
-            .as_ref()
-            .expect("Unwrapping a leased item should never fail")
-            .item // unwrap is safe here
-    }
-}
-
-impl<T> DerefMut for LeasedItem<T> {
-    fn deref_mut(&mut self) -> &mut T {
-        &mut self.gen_item
-            .as_mut()
-            .expect("Unwrapping a mut leased item should never fail")
-            .item // unwrap is safe here
-    }
-}
-
-impl<T> Drop for LeasedItem<T> {
-    fn drop(&mut self) {
-        let gen_item: GenerationItem<T> = mem::replace(&mut self.gen_item, None)
-            .expect("Unwrapping a leased item should never fail");
-        self.recycle_queue.push(gen_item);
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::Pool;
-    use std::iter;
-
-    #[test]
-    fn test_pool() {
-        let items10: Vec<usize> = iter::repeat(10).take(10).collect();
-        let pool = Pool::new();
-        pool.publish_new_generation(items10);
-        for _ in 0..20 {
-            assert_eq!(*pool.acquire(), 10);
-        }
-        let items11: Vec<usize> = iter::repeat(11).take(10).collect();
-        pool.publish_new_generation(items11);
-        for _ in 0..20 {
-            assert_eq!(*pool.acquire(), 11);
-        }
-    }
-}
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -1,15 +1,18 @@
-use collector::Collector;
-use core::InvertedIndexReader;
-use core::SegmentReader;
-use query::Query;
-use schema::Document;
-use schema::Schema;
-use schema::{Field, Term};
+use crate::collector::Collector;
+use crate::core::Executor;
+use crate::core::InvertedIndexReader;
+use crate::core::SegmentReader;
+use crate::query::Query;
+use crate::schema::Document;
+use crate::schema::Schema;
+use crate::schema::{Field, Term};
+use crate::space_usage::SearcherSpaceUsage;
+use crate::store::StoreReader;
+use crate::termdict::TermMerger;
+use crate::DocAddress;
+use crate::Index;
 use std::fmt;
 use std::sync::Arc;
-use termdict::TermMerger;
-use DocAddress;
-use Result;

 /// Holds a list of `SegmentReader`s ready for search.
 ///
@@ -18,25 +21,43 @@ use Result;
 ///
 pub struct Searcher {
    schema: Schema,
+    index: Index,
    segment_readers: Vec<SegmentReader>,
+    store_readers: Vec<StoreReader>,
 }

 impl Searcher {
    /// Creates a new `Searcher`
-    pub(crate) fn new(schema: Schema, segment_readers: Vec<SegmentReader>) -> Searcher {
+    pub(crate) fn new(
+        schema: Schema,
+        index: Index,
+        segment_readers: Vec<SegmentReader>,
+    ) -> Searcher {
+        let store_readers = segment_readers
+            .iter()
+            .map(SegmentReader::get_store_reader)
+            .collect();
        Searcher {
            schema,
+            index,
            segment_readers,
+            store_readers,
        }
    }
+
+    /// Returns the `Index` associated to the `Searcher`
+    pub fn index(&self) -> &Index {
+        &self.index
+    }
+
    /// Fetches a document from tantivy's store given a `DocAddress`.
    ///
    /// The searcher uses the segment ordinal to route the
    /// the request to the right `Segment`.
-    pub fn doc(&self, doc_address: &DocAddress) -> Result<Document> {
-        let DocAddress(segment_local_id, doc_id) = *doc_address;
-        let segment_reader = &self.segment_readers[segment_local_id as usize];
-        segment_reader.doc(doc_id)
+    pub fn doc(&self, doc_address: DocAddress) -> crate::Result<Document> {
+        let DocAddress(segment_local_id, doc_id) = doc_address;
+        let store_reader = &self.store_readers[segment_local_id as usize];
+        store_reader.get(doc_id)
    }

    /// Access the schema associated to the index of this searcher.
@@ -48,7 +69,7 @@ impl Searcher {
    pub fn num_docs(&self) -> u64 {
        self.segment_readers
            .iter()
-            .map(|segment_reader| segment_reader.num_docs() as u64)
+            .map(|segment_reader| u64::from(segment_reader.num_docs()))
            .sum::<u64>()
    }

@@ -57,7 +78,9 @@ impl Searcher {
    pub fn doc_freq(&self, term: &Term) -> u64 {
        self.segment_readers
            .iter()
-            .map(|segment_reader| segment_reader.inverted_index(term.field()).doc_freq(term) as u64)
+            .map(|segment_reader| {
+                u64::from(segment_reader.inverted_index(term.field()).doc_freq(term))
+            })
            .sum::<u64>()
    }

@@ -71,19 +94,77 @@ impl Searcher {
        &self.segment_readers[segment_ord as usize]
    }

-    /// Runs a query on the segment readers wrapped by the searcher
-    pub fn search<C: Collector>(&self, query: &Query, collector: &mut C) -> Result<()> {
-        query.search(self, collector)
+    /// Runs a query on the segment readers wrapped by the searcher.
+    ///
+    /// Search works as follows :
+    ///
+    ///  First the weight object associated to the query is created.
+    ///
+    ///  Then, the query loops over the segments and for each segment :
+    ///  - setup the collector and informs it that the segment being processed has changed.
+    ///  - creates a SegmentCollector for collecting documents associated to the segment
+    ///  - creates a `Scorer` object associated for this segment
+    ///  - iterate through the matched documents and push them to the segment collector.
+    ///
+    ///  Finally, the Collector merges each of the child collectors into itself for result usability
+    ///  by the caller.
+    pub fn search<C: Collector>(
+        &self,
+        query: &dyn Query,
+        collector: &C,
+    ) -> crate::Result<C::Fruit> {
+        let executor = self.index.search_executor();
+        self.search_with_executor(query, collector, executor)
+    }
+
+    /// Same as [`search(...)`](#method.search) but multithreaded.
+    ///
+    /// The current implementation is rather naive :
+    /// multithreading is by splitting search into as many task
+    /// as there are segments.
+    ///
+    /// It is powerless at making search faster if your index consists in
+    /// one large segment.
+    ///
+    /// Also, keep in my multithreading a single query on several
+    /// threads will not improve your throughput. It can actually
+    /// hurt it. It will however, decrease the average response time.
+    pub fn search_with_executor<C: Collector>(
+        &self,
+        query: &dyn Query,
+        collector: &C,
+        executor: &Executor,
+    ) -> crate::Result<C::Fruit> {
+        let scoring_enabled = collector.requires_scoring();
+        let weight = query.weight(self, scoring_enabled)?;
+        let segment_readers = self.segment_readers();
+        let fruits = executor.map(
+            |(segment_ord, segment_reader)| {
+                collector.collect_segment(weight.as_ref(), segment_ord as u32, segment_reader)
+            },
+            segment_readers.iter().enumerate(),
+        )?;
+        collector.merge_fruits(fruits)
    }

    /// Return the field searcher associated to a `Field`.
    pub fn field(&self, field: Field) -> FieldSearcher {
-        let inv_index_readers = self.segment_readers
+        let inv_index_readers = self
+            .segment_readers
            .iter()
            .map(|segment_reader| segment_reader.inverted_index(field))
            .collect::<Vec<_>>();
        FieldSearcher::new(inv_index_readers)
    }
+
+    /// Summarize total space usage of this searcher.
+    pub fn space_usage(&self) -> SearcherSpaceUsage {
+        let mut space_usage = SearcherSpaceUsage::new();
+        for segment_reader in self.segment_readers.iter() {
+            space_usage.add_segment(segment_reader.space_usage());
+        }
+        space_usage
+    }
 }

 pub struct FieldSearcher {
@@ -97,8 +178,9 @@ impl FieldSearcher {

    /// Returns a Stream over all of the sorted unique terms of
    /// for the given field.
-    pub fn terms(&self) -> TermMerger {
-        let term_streamers: Vec<_> = self.inv_index_readers
+    pub fn terms(&self) -> TermMerger<'_> {
+        let term_streamers: Vec<_> = self
+            .inv_index_readers
            .iter()
            .map(|inverted_index| inverted_index.terms().stream())
            .collect();
@@ -107,10 +189,11 @@ impl FieldSearcher {
 }

 impl fmt::Debug for Searcher {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let segment_ids = self.segment_readers
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let segment_ids = self
+            .segment_readers
            .iter()
-            .map(|segment_reader| segment_reader.segment_id())
+            .map(SegmentReader::segment_id)
            .collect::<Vec<_>>();
        write!(f, "Searcher({:?})", segment_ids)
    }
--- a/src/core/segment.rs
+++ b/src/core/segment.rs
@@ -1,16 +1,15 @@
 use super::SegmentComponent;
-use core::Index;
-use core::SegmentId;
-use core::SegmentMeta;
-use directory::error::{OpenReadError, OpenWriteError};
-use directory::Directory;
-use directory::{ReadOnlySource, WritePtr};
-use indexer::segment_serializer::SegmentSerializer;
-use schema::Schema;
+use crate::core::Index;
+use crate::core::SegmentId;
+use crate::core::SegmentMeta;
+use crate::directory::error::{OpenReadError, OpenWriteError};
+use crate::directory::Directory;
+use crate::directory::{ReadOnlySource, WritePtr};
+use crate::indexer::segment_serializer::SegmentSerializer;
+use crate::schema::Schema;
+use crate::Opstamp;
 use std::fmt;
 use std::path::PathBuf;
-use std::result;
-use Result;

 /// A segment is a piece of the index.
 #[derive(Clone)]
@@ -20,20 +19,17 @@ pub struct Segment {
 }

 impl fmt::Debug for Segment {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "Segment({:?})", self.id().uuid_string())
    }
 }

-/// Creates a new segment given an `Index` and a `SegmentId`
-///
-/// The function is here to make it private outside `tantivy`.
-/// #[doc(hidden)]
-pub fn create_segment(index: Index, meta: SegmentMeta) -> Segment {
-    Segment { index, meta }
-}
-
 impl Segment {
+    /// Creates a new segment given an `Index` and a `SegmentId`
+    pub(crate) fn for_index(index: Index, meta: SegmentMeta) -> Segment {
+        Segment { index, meta }
+    }
+
    /// Returns the index the segment belongs to.
    pub fn index(&self) -> &Index {
        &self.index
@@ -49,8 +45,19 @@ impl Segment {
        &self.meta
    }

+    /// Updates the max_doc value from the `SegmentMeta`.
+    ///
+    /// This method is only used when updating `max_doc` from 0
+    /// as we finalize a fresh new segment.
+    pub(crate) fn with_max_doc(self, max_doc: u32) -> Segment {
+        Segment {
+            index: self.index,
+            meta: self.meta.with_max_doc(max_doc),
+        }
+    }
+
    #[doc(hidden)]
-    pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: u64) -> Segment {
+    pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: Opstamp) -> Segment {
        Segment {
            index: self.index,
            meta: self.meta.with_delete_meta(num_deleted_docs, opstamp),
@@ -71,20 +78,14 @@ impl Segment {
    }

    /// Open one of the component file for a *regular* read.
-    pub fn open_read(
-        &self,
-        component: SegmentComponent,
-    ) -> result::Result<ReadOnlySource, OpenReadError> {
+    pub fn open_read(&self, component: SegmentComponent) -> Result<ReadOnlySource, OpenReadError> {
        let path = self.relative_path(component);
        let source = self.index.directory().open_read(&path)?;
        Ok(source)
    }

    /// Open one of the component file for *regular* write.
-    pub fn open_write(
-        &mut self,
-        component: SegmentComponent,
-    ) -> result::Result<WritePtr, OpenWriteError> {
+    pub fn open_write(&mut self, component: SegmentComponent) -> Result<WritePtr, OpenWriteError> {
        let path = self.relative_path(component);
        let write = self.index.directory_mut().open_write(&path)?;
        Ok(write)
@@ -97,5 +98,5 @@ pub trait SerializableSegment {
    ///
    /// # Returns
    /// The number of documents in the segment.
-    fn write(&self, serializer: SegmentSerializer) -> Result<u32>;
+    fn write(&self, serializer: SegmentSerializer) -> crate::Result<u32>;
 }
--- a/src/core/segment_component.rs
+++ b/src/core/segment_component.rs
@@ -10,6 +10,8 @@ pub enum SegmentComponent {
    POSTINGS,
    /// Positions of terms in each document.
    POSITIONS,
+    /// Index to seek within the position file
+    POSITIONSSKIP,
    /// Column-oriented random-access storage of fields.
    FASTFIELDS,
    /// Stores the sum  of the length (in terms) of each field for each document.
@@ -29,15 +31,16 @@ pub enum SegmentComponent {
 impl SegmentComponent {
    /// Iterates through the components.
    pub fn iterator() -> slice::Iter<'static, SegmentComponent> {
-        static SEGMENT_COMPONENTS: [SegmentComponent; 7] = [
+        static SEGMENT_COMPONENTS: [SegmentComponent; 8] = [
            SegmentComponent::POSTINGS,
            SegmentComponent::POSITIONS,
+            SegmentComponent::POSITIONSSKIP,
            SegmentComponent::FASTFIELDS,
            SegmentComponent::FIELDNORMS,
            SegmentComponent::TERMS,
            SegmentComponent::STORE,
            SegmentComponent::DELETE,
        ];
-        SEGMENT_COMPONENTS.into_iter()
+        SEGMENT_COMPONENTS.iter()
    }
 }
--- a/src/core/segment_id.rs
+++ b/src/core/segment_id.rs
@@ -2,6 +2,11 @@ use std::cmp::{Ord, Ordering};
 use std::fmt;
 use uuid::Uuid;

+#[cfg(test)]
+use once_cell::sync::Lazy;
+use serde::{Deserialize, Serialize};
+use std::error::Error;
+use std::str::FromStr;
 #[cfg(test)]
 use std::sync::atomic;

@@ -17,10 +22,10 @@ use std::sync::atomic;
 pub struct SegmentId(Uuid);

 #[cfg(test)]
-lazy_static! {
-    static ref AUTO_INC_COUNTER: atomic::AtomicUsize = atomic::AtomicUsize::default();
-    static ref EMPTY_ARR: [u8; 8] = [0u8; 8];
-}
+static AUTO_INC_COUNTER: Lazy<atomic::AtomicUsize> = Lazy::new(|| atomic::AtomicUsize::default());
+
+#[cfg(test)]
+const ZERO_ARRAY: [u8; 8] = [0u8; 8];

 // During tests, we generate the segment id in a autoincrement manner
 // for consistency of segment id between run.
@@ -30,7 +35,7 @@ lazy_static! {
 #[cfg(test)]
 fn create_uuid() -> Uuid {
    let new_auto_inc_id = (*AUTO_INC_COUNTER).fetch_add(1, atomic::Ordering::SeqCst);
-    Uuid::from_fields(new_auto_inc_id as u32, 0, 0, &*EMPTY_ARR).unwrap()
+    Uuid::from_fields(new_auto_inc_id as u32, 0, 0, &ZERO_ARRAY).unwrap()
 }

 #[cfg(not(test))]
@@ -50,19 +55,55 @@ impl SegmentId {
    /// and the rest is random.
    ///
    /// Picking the first 8 chars is ok to identify
-    /// segments in a display message.
+    /// segments in a display message (e.g. a5c4dfcb).
    pub fn short_uuid_string(&self) -> String {
-        (&self.0.simple().to_string()[..8]).to_string()
+        (&self.0.to_simple_ref().to_string()[..8]).to_string()
    }

    /// Returns a segment uuid string.
+    ///
+    /// It consists in 32 lowercase hexadecimal chars
+    /// (e.g. a5c4dfcbdfe645089129e308e26d5523)
    pub fn uuid_string(&self) -> String {
-        self.0.simple().to_string()
+        self.0.to_simple_ref().to_string()
+    }
+
+    /// Build a `SegmentId` string from the full uuid string.
+    ///
+    /// E.g. "a5c4dfcbdfe645089129e308e26d5523"
+    pub fn from_uuid_string(uuid_string: &str) -> Result<SegmentId, SegmentIdParseError> {
+        FromStr::from_str(uuid_string)
+    }
+}
+
+/// Error type used when parsing a `SegmentId` from a string fails.
+pub struct SegmentIdParseError(uuid::Error);
+
+impl Error for SegmentIdParseError {}
+
+impl fmt::Debug for SegmentIdParseError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
+impl fmt::Display for SegmentIdParseError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
+impl FromStr for SegmentId {
+    type Err = SegmentIdParseError;
+
+    fn from_str(uuid_string: &str) -> Result<Self, SegmentIdParseError> {
+        let uuid = Uuid::parse_str(uuid_string).map_err(SegmentIdParseError)?;
+        Ok(SegmentId(uuid))
    }
 }

 impl fmt::Debug for SegmentId {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "Seg({:?})", self.short_uuid_string())
    }
 }
@@ -78,3 +119,18 @@ impl Ord for SegmentId {
        self.0.as_bytes().cmp(other.0.as_bytes())
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::SegmentId;
+
+    #[test]
+    fn test_to_uuid_string() {
+        let full_uuid = "a5c4dfcbdfe645089129e308e26d5523";
+        let segment_id = SegmentId::from_uuid_string(full_uuid).unwrap();
+        assert_eq!(segment_id.uuid_string(), full_uuid);
+        assert_eq!(segment_id.short_uuid_string(), "a5c4dfcb");
+        // one extra char
+        assert!(SegmentId::from_uuid_string("a5c4dfcbdfe645089129e308e26d5523b").is_err());
+    }
+}
--- a/src/core/segment_meta.rs
+++ b/src/core/segment_meta.rs
@@ -1,173 +0,0 @@
-use super::SegmentComponent;
-use census::{Inventory, TrackedObject};
-use core::SegmentId;
-use serde;
-use std::collections::HashSet;
-use std::fmt;
-use std::path::PathBuf;
-
-lazy_static! {
-    static ref INVENTORY: Inventory<InnerSegmentMeta> = { Inventory::new() };
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-struct DeleteMeta {
-    num_deleted_docs: u32,
-    opstamp: u64,
-}
-
-/// `SegmentMeta` contains simple meta information about a segment.
-///
-/// For instance the number of docs it contains,
-/// how many are deleted, etc.
-#[derive(Clone)]
-pub struct SegmentMeta {
-    tracked: TrackedObject<InnerSegmentMeta>,
-}
-
-impl fmt::Debug for SegmentMeta {
-    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
-        self.tracked.fmt(f)
-    }
-}
-
-impl serde::Serialize for SegmentMeta {
-    fn serialize<S>(
-        &self,
-        serializer: S,
-    ) -> Result<<S as serde::Serializer>::Ok, <S as serde::Serializer>::Error>
-    where
-        S: serde::Serializer,
-    {
-        self.tracked.serialize(serializer)
-    }
-}
-
-impl<'a> serde::Deserialize<'a> for SegmentMeta {
-    fn deserialize<D>(deserializer: D) -> Result<Self, <D as serde::Deserializer<'a>>::Error>
-    where
-        D: serde::Deserializer<'a>,
-    {
-        let inner = InnerSegmentMeta::deserialize(deserializer)?;
-        let tracked = INVENTORY.track(inner);
-        Ok(SegmentMeta { tracked: tracked })
-    }
-}
-
-impl SegmentMeta {
-    /// Lists all living `SegmentMeta` object at the time of the call.
-    pub fn all() -> Vec<SegmentMeta> {
-        INVENTORY
-            .list()
-            .into_iter()
-            .map(|inner| SegmentMeta { tracked: inner })
-            .collect::<Vec<_>>()
-    }
-
-    /// Creates a new `SegmentMeta` object.
-    #[doc(hidden)]
-    pub fn new(segment_id: SegmentId, max_doc: u32) -> SegmentMeta {
-        let inner = InnerSegmentMeta {
-            segment_id,
-            max_doc,
-            deletes: None,
-        };
-        SegmentMeta {
-            tracked: INVENTORY.track(inner),
-        }
-    }
-
-    /// Returns the segment id.
-    pub fn id(&self) -> SegmentId {
-        self.tracked.segment_id
-    }
-
-    /// Returns the number of deleted documents.
-    pub fn num_deleted_docs(&self) -> u32 {
-        self.tracked
-            .deletes
-            .as_ref()
-            .map(|delete_meta| delete_meta.num_deleted_docs)
-            .unwrap_or(0u32)
-    }
-
-    /// Returns the list of files that
-    /// are required for the segment meta.
-    ///
-    /// This is useful as the way tantivy removes files
-    /// is by removing all files that have been created by tantivy
-    /// and are not used by any segment anymore.
-    pub fn list_files(&self) -> HashSet<PathBuf> {
-        SegmentComponent::iterator()
-            .map(|component| self.relative_path(*component))
-            .collect::<HashSet<PathBuf>>()
-    }
-
-    /// Returns the relative path of a component of our segment.
-    ///
-    /// It just joins the segment id with the extension
-    /// associated to a segment component.
-    pub fn relative_path(&self, component: SegmentComponent) -> PathBuf {
-        let mut path = self.id().uuid_string();
-        path.push_str(&*match component {
-            SegmentComponent::POSITIONS => ".pos".to_string(),
-            SegmentComponent::POSTINGS => ".idx".to_string(),
-            SegmentComponent::TERMS => ".term".to_string(),
-            SegmentComponent::STORE => ".store".to_string(),
-            SegmentComponent::FASTFIELDS => ".fast".to_string(),
-            SegmentComponent::FIELDNORMS => ".fieldnorm".to_string(),
-            SegmentComponent::DELETE => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
-        });
-        PathBuf::from(path)
-    }
-
-    /// Return the highest doc id + 1
-    ///
-    /// If there are no deletes, then num_docs = max_docs
-    /// and all the doc ids contains in this segment
-    /// are exactly (0..max_doc).
-    pub fn max_doc(&self) -> u32 {
-        self.tracked.max_doc
-    }
-
-    /// Return the number of documents in the segment.
-    pub fn num_docs(&self) -> u32 {
-        self.max_doc() - self.num_deleted_docs()
-    }
-
-    /// Returns the opstamp of the last delete operation
-    /// taken in account in this segment.
-    pub fn delete_opstamp(&self) -> Option<u64> {
-        self.tracked
-            .deletes
-            .as_ref()
-            .map(|delete_meta| delete_meta.opstamp)
-    }
-
-    /// Returns true iff the segment meta contains
-    /// delete information.
-    pub fn has_deletes(&self) -> bool {
-        self.num_deleted_docs() > 0
-    }
-
-    #[doc(hidden)]
-    pub fn with_delete_meta(self, num_deleted_docs: u32, opstamp: u64) -> SegmentMeta {
-        let delete_meta = DeleteMeta {
-            num_deleted_docs,
-            opstamp,
-        };
-        let tracked = self.tracked.map(move |inner_meta| InnerSegmentMeta {
-            segment_id: inner_meta.segment_id,
-            max_doc: inner_meta.max_doc,
-            deletes: Some(delete_meta),
-        });
-        SegmentMeta { tracked }
-    }
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-struct InnerSegmentMeta {
-    segment_id: SegmentId,
-    max_doc: u32,
-    deletes: Option<DeleteMeta>,
-}
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -1,30 +1,26 @@
-use common::CompositeFile;
-use common::HasLen;
-use core::InvertedIndexReader;
-use core::Segment;
-use core::SegmentComponent;
-use core::SegmentId;
-use core::SegmentMeta;
-use error::ErrorKind;
-use fastfield::DeleteBitSet;
-use fastfield::FacetReader;
-use fastfield::FastFieldReader;
-use fastfield::{self, FastFieldNotAvailableError};
-use fastfield::{BytesFastFieldReader, FastValue, MultiValueIntFastFieldReader};
-use fieldnorm::FieldNormReader;
-use schema::Cardinality;
-use schema::Document;
-use schema::Field;
-use schema::FieldType;
-use schema::Schema;
+use crate::common::CompositeFile;
+use crate::common::HasLen;
+use crate::core::InvertedIndexReader;
+use crate::core::Segment;
+use crate::core::SegmentComponent;
+use crate::core::SegmentId;
+use crate::directory::ReadOnlySource;
+use crate::fastfield::DeleteBitSet;
+use crate::fastfield::FacetReader;
+use crate::fastfield::FastFieldReaders;
+use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
+use crate::schema::Field;
+use crate::schema::FieldType;
+use crate::schema::Schema;
+use crate::space_usage::SegmentSpaceUsage;
+use crate::store::StoreReader;
+use crate::termdict::TermDictionary;
+use crate::DocId;
+use fail::fail_point;
 use std::collections::HashMap;
 use std::fmt;
 use std::sync::Arc;
 use std::sync::RwLock;
-use store::StoreReader;
-use termdict::TermDictionary;
-use DocId;
-use Result;

 /// Entry point to access all of the datastructures of the `Segment`
 ///
@@ -44,15 +40,17 @@ pub struct SegmentReader {
    inv_idx_reader_cache: Arc<RwLock<HashMap<Field, Arc<InvertedIndexReader>>>>,

    segment_id: SegmentId,
-    segment_meta: SegmentMeta,
+    max_doc: DocId,
+    num_docs: DocId,

    termdict_composite: CompositeFile,
    postings_composite: CompositeFile,
    positions_composite: CompositeFile,
-    fast_fields_composite: CompositeFile,
-    fieldnorms_composite: CompositeFile,
+    positions_idx_composite: CompositeFile,
+    fast_fields_readers: Arc<FastFieldReaders>,
+    fieldnorm_readers: FieldNormReaders,

-    store_reader: StoreReader,
+    store_source: ReadOnlySource,
    delete_bitset_opt: Option<DeleteBitSet>,
    schema: Schema,
 }
@@ -63,7 +61,7 @@ impl SegmentReader {
    /// Today, `tantivy` does not handle deletes, so it happens
    /// to also be the number of documents in the index.
    pub fn max_doc(&self) -> DocId {
-        self.segment_meta.max_doc()
+        self.max_doc
    }

    /// Returns the number of documents.
@@ -72,7 +70,7 @@ impl SegmentReader {
    /// Today, `tantivy` does not handle deletes so max doc and
    /// num_docs are the same.
    pub fn num_docs(&self) -> DocId {
-        self.segment_meta.num_docs()
+        self.num_docs
    }

    /// Returns the schema of the index this segment belongs to.
@@ -103,104 +101,35 @@ impl SegmentReader {
    ///
    /// # Panics
    /// May panic if the index is corrupted.
-    pub fn fast_field_reader<Item: FastValue>(
-        &self,
-        field: Field,
-    ) -> fastfield::Result<FastFieldReader<Item>> {
-        let field_entry = self.schema.get_field_entry(field);
-        if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue)
-        {
-            self.fast_fields_composite
-                .open_read(field)
-                .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
-                .map(FastFieldReader::open)
-        } else {
-            Err(FastFieldNotAvailableError::new(field_entry))
-        }
-    }
-
-    pub(crate) fn fast_field_reader_with_idx<Item: FastValue>(
-        &self,
-        field: Field,
-        idx: usize,
-    ) -> fastfield::Result<FastFieldReader<Item>> {
-        if let Some(ff_source) = self.fast_fields_composite.open_read_with_idx(field, idx) {
-            Ok(FastFieldReader::open(ff_source))
-        } else {
-            let field_entry = self.schema.get_field_entry(field);
-            Err(FastFieldNotAvailableError::new(field_entry))
-        }
-    }
-
-    /// Accessor to the `MultiValueIntFastFieldReader` associated to a given `Field`.
-    /// May panick if the field is not a multivalued fastfield of the type `Item`.
-    pub fn multi_fast_field_reader<Item: FastValue>(
-        &self,
-        field: Field,
-    ) -> fastfield::Result<MultiValueIntFastFieldReader<Item>> {
-        let field_entry = self.schema.get_field_entry(field);
-        if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues)
-        {
-            let idx_reader = self.fast_field_reader_with_idx(field, 0)?;
-            let vals_reader = self.fast_field_reader_with_idx(field, 1)?;
-            Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
-        } else {
-            Err(FastFieldNotAvailableError::new(field_entry))
-        }
-    }
-
-    /// Accessor to the `BytesFastFieldReader` associated to a given `Field`.
-    pub fn bytes_fast_field_reader(&self, field: Field) -> fastfield::Result<BytesFastFieldReader> {
-        let field_entry = self.schema.get_field_entry(field);
-        match field_entry.field_type() {
-            &FieldType::Bytes => {}
-            _ => return Err(FastFieldNotAvailableError::new(field_entry)),
-        }
-        let idx_reader = self.fast_fields_composite
-            .open_read_with_idx(field, 0)
-            .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
-            .map(FastFieldReader::open)?;
-        let values = self.fast_fields_composite
-            .open_read_with_idx(field, 1)
-            .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))?;
-        Ok(BytesFastFieldReader::open(idx_reader, values))
+    pub fn fast_fields(&self) -> &FastFieldReaders {
+        &self.fast_fields_readers
    }

    /// Accessor to the `FacetReader` associated to a given `Field`.
-    pub fn facet_reader(&self, field: Field) -> Result<FacetReader> {
+    pub fn facet_reader(&self, field: Field) -> Option<FacetReader> {
        let field_entry = self.schema.get_field_entry(field);
        if field_entry.field_type() != &FieldType::HierarchicalFacet {
-            return Err(ErrorKind::InvalidArgument(format!(
-                "The field {:?} is not a \
-                 hierarchical facet.",
-                field_entry
-            )).into());
+            return None;
        }
-        let term_ords_reader = self.multi_fast_field_reader(field)?;
-        let termdict_source = self.termdict_composite.open_read(field).ok_or_else(|| {
-            ErrorKind::InvalidArgument(format!(
-                "The field \"{}\" is a hierarchical \
-                 but this segment does not seem to have the field term \
-                 dictionary.",
-                field_entry.name()
-            ))
-        })?;
-        let termdict = TermDictionary::from_source(termdict_source);
+        let term_ords_reader = self.fast_fields().u64s(field)?;
+        let termdict = self.termdict_composite
+            .open_read(field)
+            .map(|source| TermDictionary::from_source(&source))
+            .unwrap_or_else(TermDictionary::empty);
        let facet_reader = FacetReader::new(term_ords_reader, termdict);
-        Ok(facet_reader)
+        Some(facet_reader)
    }

    /// Accessor to the segment's `Field norms`'s reader.
    ///
    /// Field norms are the length (in tokens) of the fields.
-    /// It is used in the computation of the [TfIdf]
-    /// (https://fulmicoton.gitbooks.io/tantivy-doc/content/tfidf.html).
+    /// It is used in the computation of the [TfIdf](https://fulmicoton.gitbooks.io/tantivy-doc/content/tfidf.html).
    ///
    /// They are simply stored as a fast field, serialized in
    /// the `.fieldnorm` file of the segment.
    pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader {
-        if let Some(fieldnorm_source) = self.fieldnorms_composite.open_read(field) {
-            FieldNormReader::open(fieldnorm_source)
+        if let Some(fieldnorm_reader) = self.fieldnorm_readers.get_field(field) {
+            fieldnorm_reader
        } else {
            let field_name = self.schema.get_field_name(field);
            let err_msg = format!(
@@ -212,17 +141,18 @@ impl SegmentReader {
    }

    /// Accessor to the segment's `StoreReader`.
-    pub fn get_store_reader(&self) -> &StoreReader {
-        &self.store_reader
+    pub fn get_store_reader(&self) -> StoreReader {
+        StoreReader::from_source(self.store_source.clone())
    }

    /// Open a new segment for reading.
-    pub fn open(segment: &Segment) -> Result<SegmentReader> {
+    pub fn open(segment: &Segment) -> crate::Result<SegmentReader> {
        let termdict_source = segment.open_read(SegmentComponent::TERMS)?;
        let termdict_composite = CompositeFile::open(&termdict_source)?;

        let store_source = segment.open_read(SegmentComponent::STORE)?;
-        let store_reader = StoreReader::from_source(store_source);
+
+        fail_point!("SegmentReader::open#middle");

        let postings_source = segment.open_read(SegmentComponent::POSTINGS)?;
        let postings_composite = CompositeFile::open(&postings_source)?;
@@ -235,11 +165,23 @@ impl SegmentReader {
            }
        };

+        let positions_idx_composite = {
+            if let Ok(source) = segment.open_read(SegmentComponent::POSITIONSSKIP) {
+                CompositeFile::open(&source)?
+            } else {
+                CompositeFile::empty()
+            }
+        };
+
+        let schema = segment.schema();
+
        let fast_fields_data = segment.open_read(SegmentComponent::FASTFIELDS)?;
        let fast_fields_composite = CompositeFile::open(&fast_fields_data)?;
+        let fast_field_readers =
+            Arc::new(FastFieldReaders::load_all(&schema, &fast_fields_composite)?);

-        let fieldnorms_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
-        let fieldnorms_composite = CompositeFile::open(&fieldnorms_data)?;
+        let fieldnorm_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
+        let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;

        let delete_bitset_opt = if segment.meta().has_deletes() {
            let delete_data = segment.open_read(SegmentComponent::DELETE)?;
@@ -248,18 +190,19 @@ impl SegmentReader {
            None
        };

-        let schema = segment.schema();
        Ok(SegmentReader {
            inv_idx_reader_cache: Arc::new(RwLock::new(HashMap::new())),
-            segment_meta: segment.meta().clone(),
+            max_doc: segment.meta().max_doc(),
+            num_docs: segment.meta().num_docs(),
            termdict_composite,
            postings_composite,
-            fast_fields_composite,
-            fieldnorms_composite,
+            fast_fields_readers: fast_field_readers,
+            fieldnorm_readers,
            segment_id: segment.id(),
-            store_reader,
+            store_source,
            delete_bitset_opt,
            positions_composite,
+            positions_idx_composite,
            schema,
        })
    }
@@ -272,7 +215,8 @@ impl SegmentReader {
    /// term dictionary associated to a specific field,
    /// and opening the posting list associated to any term.
    pub fn inverted_index(&self, field: Field) -> Arc<InvertedIndexReader> {
-        if let Some(inv_idx_reader) = self.inv_idx_reader_cache
+        if let Some(inv_idx_reader) = self
+            .inv_idx_reader_cache
            .read()
            .expect("Lock poisoned. This should never happen")
            .get(&field)
@@ -296,23 +240,30 @@ impl SegmentReader {
            // As a result, no data is associated to the inverted index.
            //
            // Returns an empty inverted index.
-            return Arc::new(InvertedIndexReader::empty(field_type.clone()));
+            return Arc::new(InvertedIndexReader::empty(field_type));
        }

        let postings_source = postings_source_opt.unwrap();

-        let termdict_source = self.termdict_composite
-            .open_read(field)
-            .expect("Failed to open field term dictionary in composite file. Is the field indexed");
+        let termdict_source = self.termdict_composite.open_read(field).expect(
+            "Failed to open field term dictionary in composite file. Is the field indexed?",
+        );

-        let positions_source = self.positions_composite
+        let positions_source = self
+            .positions_composite
+            .open_read(field)
+            .expect("Index corrupted. Failed to open field positions in composite file.");
+
+        let positions_idx_source = self
+            .positions_idx_composite
            .open_read(field)
            .expect("Index corrupted. Failed to open field positions in composite file.");

        let inv_idx_reader = Arc::new(InvertedIndexReader::new(
-            TermDictionary::from_source(termdict_source),
+            TermDictionary::from_source(&termdict_source),
            postings_source,
            positions_source,
+            positions_idx_source,
            record_option,
        ));

@@ -326,14 +277,6 @@ impl SegmentReader {
        inv_idx_reader
    }

-    /// Returns the document (or to be accurate, its stored field)
-    /// bearing the given doc id.
-    /// This method is slow and should seldom be called from
-    /// within a collector.
-    pub fn doc(&self, doc_id: DocId) -> Result<Document> {
-        self.store_reader.get(doc_id)
-    }
-
    /// Returns the segment id
    pub fn segment_id(&self) -> SegmentId {
        self.segment_id
@@ -354,72 +297,44 @@ impl SegmentReader {
    }

    /// Returns an iterator that will iterate over the alive document ids
-    pub fn doc_ids_alive(&self) -> SegmentReaderAliveDocsIterator {
-        SegmentReaderAliveDocsIterator::new(&self)
+    pub fn doc_ids_alive<'a>(&'a self) -> impl Iterator<Item = DocId> + 'a {
+        (0u32..self.max_doc).filter(move |doc| !self.is_deleted(*doc))
+    }
+
+    /// Summarize total space usage of this segment.
+    pub fn space_usage(&self) -> SegmentSpaceUsage {
+        SegmentSpaceUsage::new(
+            self.num_docs(),
+            self.termdict_composite.space_usage(),
+            self.postings_composite.space_usage(),
+            self.positions_composite.space_usage(),
+            self.positions_idx_composite.space_usage(),
+            self.fast_fields_readers.space_usage(),
+            self.fieldnorm_readers.space_usage(),
+            self.get_store_reader().space_usage(),
+            self.delete_bitset_opt
+                .as_ref()
+                .map(DeleteBitSet::space_usage)
+                .unwrap_or(0),
+        )
    }
 }

 impl fmt::Debug for SegmentReader {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "SegmentReader({:?})", self.segment_id)
    }
 }

-/// Implements the iterator trait to allow easy iteration
-/// over non-deleted ("alive") DocIds in a SegmentReader
-pub struct SegmentReaderAliveDocsIterator<'a> {
-    reader: &'a SegmentReader,
-    max_doc: DocId,
-    current: DocId,
-}
-
-impl<'a> SegmentReaderAliveDocsIterator<'a> {
-    pub fn new(reader: &'a SegmentReader) -> SegmentReaderAliveDocsIterator<'a> {
-        SegmentReaderAliveDocsIterator {
-            reader: reader,
-            max_doc: reader.max_doc(),
-            current: 0,
-        }
-    }
-}
-
-impl<'a> Iterator for SegmentReaderAliveDocsIterator<'a> {
-    type Item = DocId;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        // TODO: Use TinySet (like in BitSetDocSet) to speed this process up
-        if self.current >= self.max_doc {
-            return None;
-        }
-
-        // find the next alive doc id
-        while self.reader.is_deleted(self.current) {
-            self.current += 1;
-
-            if self.current >= self.max_doc {
-                return None;
-            }
-        }
-
-        // capture the current alive DocId
-        let result = Some(self.current);
-
-        // move down the chain
-        self.current += 1;
-
-        result
-    }
-}
-
 #[cfg(test)]
 mod test {
-    use core::Index;
-    use schema::{SchemaBuilder, Term, STORED, TEXT};
-    use DocId;
+    use crate::core::Index;
+    use crate::schema::{Schema, Term, STORED, TEXT};
+    use crate::DocId;

    #[test]
    fn test_alive_docs_iterator() {
-        let mut schema_builder = SchemaBuilder::new();
+        let mut schema_builder = Schema::builder();
        schema_builder.add_text_field("name", TEXT | STORED);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema.clone());
@@ -444,9 +359,7 @@ mod test {
            // ok, now we should have a deleted doc
            index_writer2.commit().unwrap();
        }
-
-        index.load_searchers().unwrap();
-        let searcher = index.searcher();
+        let searcher = index.reader().unwrap().searcher();
        let docs: Vec<DocId> = searcher.segment_reader(0).doc_ids_alive().collect();
        assert_eq!(vec![0u32, 2u32], docs);
    }
--- a/src/directory/directory.rs
+++ b/src/directory/directory.rs
@@ -1,11 +1,104 @@
-use directory::error::{DeleteError, OpenReadError, OpenWriteError};
-use directory::{ReadOnlySource, WritePtr};
+use crate::directory::directory_lock::Lock;
+use crate::directory::error::LockError;
+use crate::directory::error::{DeleteError, OpenReadError, OpenWriteError};
+use crate::directory::WatchCallback;
+use crate::directory::WatchHandle;
+use crate::directory::{ReadOnlySource, WritePtr};
 use std::fmt;
 use std::io;
+use std::io::Write;
 use std::marker::Send;
 use std::marker::Sync;
 use std::path::Path;
+use std::path::PathBuf;
 use std::result;
+use std::thread;
+use std::time::Duration;
+
+/// Retry the logic of acquiring locks is pretty simple.
+/// We just retry `n` times after a given `duratio`, both
+/// depending on the type of lock.
+struct RetryPolicy {
+    num_retries: usize,
+    wait_in_ms: u64,
+}
+
+impl RetryPolicy {
+    fn no_retry() -> RetryPolicy {
+        RetryPolicy {
+            num_retries: 0,
+            wait_in_ms: 0,
+        }
+    }
+
+    fn wait_and_retry(&mut self) -> bool {
+        if self.num_retries == 0 {
+            false
+        } else {
+            self.num_retries -= 1;
+            let wait_duration = Duration::from_millis(self.wait_in_ms);
+            thread::sleep(wait_duration);
+            true
+        }
+    }
+}
+
+/// The `DirectoryLock` is an object that represents a file lock.
+/// See  [`LockType`](struct.LockType.html)
+///
+/// It is transparently associated to a lock file, that gets deleted
+/// on `Drop.` The lock is released automatically on `Drop`.
+pub struct DirectoryLock(Box<dyn Send + Sync + 'static>);
+
+struct DirectoryLockGuard {
+    directory: Box<dyn Directory>,
+    path: PathBuf,
+}
+
+impl<T: Send + Sync + 'static> From<Box<T>> for DirectoryLock {
+    fn from(underlying: Box<T>) -> Self {
+        DirectoryLock(underlying)
+    }
+}
+
+impl Drop for DirectoryLockGuard {
+    fn drop(&mut self) {
+        if let Err(e) = self.directory.delete(&*self.path) {
+            error!("Failed to remove the lock file. {:?}", e);
+        }
+    }
+}
+
+enum TryAcquireLockError {
+    FileExists,
+    IOError(io::Error),
+}
+
+fn try_acquire_lock(
+    filepath: &Path,
+    directory: &mut dyn Directory,
+) -> Result<DirectoryLock, TryAcquireLockError> {
+    let mut write = directory.open_write(filepath).map_err(|e| match e {
+        OpenWriteError::FileAlreadyExists(_) => TryAcquireLockError::FileExists,
+        OpenWriteError::IOError(io_error) => TryAcquireLockError::IOError(io_error.into()),
+    })?;
+    write.flush().map_err(TryAcquireLockError::IOError)?;
+    Ok(DirectoryLock::from(Box::new(DirectoryLockGuard {
+        directory: directory.box_clone(),
+        path: filepath.to_owned(),
+    })))
+}
+
+fn retry_policy(is_blocking: bool) -> RetryPolicy {
+    if is_blocking {
+        RetryPolicy {
+            num_retries: 100,
+            wait_in_ms: 100,
+        }
+    } else {
+        RetryPolicy::no_retry()
+    }
+}

 /// Write-once read many (WORM) abstraction for where
 /// tantivy's data should be stored.
@@ -17,7 +110,7 @@ use std::result;
 /// - The [`RAMDirectory`](struct.RAMDirectory.html), which
 /// should be used mostly for tests.
 ///
-pub trait Directory: fmt::Debug + Send + Sync + 'static {
+pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
    /// Opens a virtual file for read.
    ///
    /// Once a virtual file is open, its data may not
@@ -25,6 +118,8 @@ pub trait Directory: fmt::Debug + Send + Sync + 'static {
    ///
    /// Specifically, subsequent writes or flushes should
    /// have no effect on the returned `ReadOnlySource` object.
+    ///
+    /// You should only use this to read files create with [Directory::open_write].
    fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError>;

    /// Removes a file
@@ -64,6 +159,8 @@ pub trait Directory: fmt::Debug + Send + Sync + 'static {
    /// atomic_write.
    ///
    /// This should only be used for small files.
+    ///
+    /// You should only use this to read files create with [Directory::atomic_write].
    fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError>;

    /// Atomically replace the content of a file with data.
@@ -74,6 +171,57 @@ pub trait Directory: fmt::Debug + Send + Sync + 'static {
    /// The file may or may not previously exist.
    fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()>;

-    /// Clones the directory and boxes the clone
-    fn box_clone(&self) -> Box<Directory>;
+    /// Acquire a lock in the given directory.
+    ///
+    /// The method is blocking or not depending on the `Lock` object.
+    fn acquire_lock(&self, lock: &Lock) -> Result<DirectoryLock, LockError> {
+        let mut box_directory = self.box_clone();
+        let mut retry_policy = retry_policy(lock.is_blocking);
+        loop {
+            match try_acquire_lock(&lock.filepath, &mut *box_directory) {
+                Ok(result) => {
+                    return Ok(result);
+                }
+                Err(TryAcquireLockError::FileExists) => {
+                    if !retry_policy.wait_and_retry() {
+                        return Err(LockError::LockBusy);
+                    }
+                }
+                Err(TryAcquireLockError::IOError(io_error)) => {
+                    return Err(LockError::IOError(io_error));
+                }
+            }
+        }
+    }
+
+    /// Registers a callback that will be called whenever a change on the `meta.json`
+    /// using the `atomic_write` API is detected.
+    ///
+    /// The behavior when using `.watch()` on a file using [Directory::open_write] is, on the other
+    /// hand, undefined.
+    ///
+    /// The file will be watched for the lifetime of the returned `WatchHandle`. The caller is
+    /// required to keep it.
+    /// It does not override previous callbacks. When the file is modified, all callback that are
+    /// registered (and whose `WatchHandle` is still alive) are triggered.
+    ///
+    /// Internally, tantivy only uses this API to detect new commits to implement the
+    /// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the
+    /// `OnCommit` `ReloadPolicy` to work properly.
+    fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle>;
+}
+
+/// DirectoryClone
+pub trait DirectoryClone {
+    /// Clones the directory and boxes the clone
+    fn box_clone(&self) -> Box<dyn Directory>;
+}
+
+impl<T> DirectoryClone for T
+where
+    T: 'static + Directory + Clone,
+{
+    fn box_clone(&self) -> Box<dyn Directory> {
+        Box::new(self.clone())
+    }
 }
--- a/src/directory/directory_lock.rs
+++ b/src/directory/directory_lock.rs
@@ -0,0 +1,55 @@
+use once_cell::sync::Lazy;
+use std::path::PathBuf;
+
+/// A directory lock.
+///
+/// A lock is associated to a specific path and some
+/// [`LockParams`](./enum.LockParams.html).
+/// Tantivy itself uses only two locks but client application
+/// can use the directory facility to define their own locks.
+/// - [INDEX_WRITER_LOCK](./struct.INDEX_WRITER_LOCK.html)
+/// - [META_LOCK](./struct.META_LOCK.html)
+///
+/// Check out these locks documentation for more information.
+///
+#[derive(Debug)]
+pub struct Lock {
+    /// The lock needs to be associated with its own file `path`.
+    /// Depending on the platform, the lock might rely on the creation
+    /// and deletion of this filepath.
+    pub filepath: PathBuf,
+    /// `lock_params` describes whether acquiring the lock is meant
+    /// to be a blocking operation or a non-blocking.
+    ///
+    /// Acquiring a blocking lock blocks until the lock is
+    /// available.
+    /// Acquiring a blocking lock returns rapidly, either successfully
+    /// or with an error signifying that someone is already holding
+    /// the lock.
+    pub is_blocking: bool,
+}
+
+/// Only one process should be able to write tantivy's index at a time.
+/// This lock file, when present, is in charge of preventing other processes to open an IndexWriter.
+///
+/// If the process is killed and this file remains, it is safe to remove it manually.
+///
+/// Failing to acquire this lock usually means a misuse of tantivy's API,
+/// (creating more than one instance of the `IndexWriter`), are a spurious
+/// lock file remaining after a crash. In the latter case, removing the file after
+/// checking no process running tantivy is running is safe.
+pub static INDEX_WRITER_LOCK: Lazy<Lock> = Lazy::new(|| Lock {
+    filepath: PathBuf::from(".tantivy-writer.lock"),
+    is_blocking: false,
+});
+/// The meta lock file is here to protect the segment files being opened by
+/// `IndexReader::reload()` from being garbage collected.
+/// It makes it possible for another process to safely consume
+/// our index in-writing. Ideally, we may have prefered `RWLock` semantics
+/// here, but it is difficult to achieve on Windows.
+///
+/// Opening segment readers is a very fast process.
+pub static META_LOCK: Lazy<Lock> = Lazy::new(|| Lock {
+    filepath: PathBuf::from(".tantivy-meta.lock"),
+    is_blocking: true,
+});
--- a/src/directory/error.rs
+++ b/src/directory/error.rs
@@ -1,8 +1,25 @@
+use crate::Version;
 use std::error::Error as StdError;
 use std::fmt;
 use std::io;
 use std::path::PathBuf;

+/// Error while trying to acquire a directory lock.
+#[derive(Debug, Fail)]
+pub enum LockError {
+    /// Failed to acquired a lock as it is already held by another
+    /// client.
+    /// - In the context of a blocking lock, this means the lock was not released within some `timeout` period.
+    /// - In the context of a non-blocking lock, this means the lock was busy at the moment of the call.
+    #[fail(
+        display = "Could not acquire lock as it is already held, possibly by a different process."
+    )]
+    LockBusy,
+    /// Trying to acquire a lock failed with an `IOError`
+    #[fail(display = "Failed to acquire the lock due to an io:Error.")]
+    IOError(io::Error),
+}
+
 /// General IO error with an optional path to the offending file.
 #[derive(Debug)]
 pub struct IOError {
@@ -10,8 +27,14 @@ pub struct IOError {
    err: io::Error,
 }

+impl Into<io::Error> for IOError {
+    fn into(self) -> io::Error {
+        self.err
+    }
+}
+
 impl fmt::Display for IOError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self.path {
            Some(ref path) => write!(f, "io error occurred on path '{:?}': '{}'", path, self.err),
            None => write!(f, "io error occurred: '{}'", self.err),
@@ -24,7 +47,7 @@ impl StdError for IOError {
        "io error occurred"
    }

-    fn cause(&self) -> Option<&StdError> {
+    fn cause(&self) -> Option<&dyn StdError> {
        Some(&self.err)
    }
 }
@@ -51,10 +74,18 @@ pub enum OpenDirectoryError {
    DoesNotExist(PathBuf),
    /// The path exists but is not a directory.
    NotADirectory(PathBuf),
+    /// IoError
+    IoError(io::Error),
+}
+
+impl From<io::Error> for OpenDirectoryError {
+    fn from(io_err: io::Error) -> Self {
+        OpenDirectoryError::IoError(io_err)
+    }
 }

 impl fmt::Display for OpenDirectoryError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match *self {
            OpenDirectoryError::DoesNotExist(ref path) => {
                write!(f, "the underlying directory '{:?}' does not exist", path)
@@ -62,6 +93,11 @@ impl fmt::Display for OpenDirectoryError {
            OpenDirectoryError::NotADirectory(ref path) => {
                write!(f, "the path '{:?}' exists but is not a directory", path)
            }
+            OpenDirectoryError::IoError(ref err) => write!(
+                f,
+                "IOError while trying to open/create the directory. {:?}",
+                err
+            ),
        }
    }
 }
@@ -71,7 +107,7 @@ impl StdError for OpenDirectoryError {
        "error occurred while opening a directory"
    }

-    fn cause(&self) -> Option<&StdError> {
+    fn cause(&self) -> Option<&dyn StdError> {
        None
    }
 }
@@ -94,7 +130,7 @@ impl From<IOError> for OpenWriteError {
 }

 impl fmt::Display for OpenWriteError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match *self {
            OpenWriteError::FileAlreadyExists(ref path) => {
                write!(f, "the file '{:?}' already exists", path)
@@ -113,7 +149,7 @@ impl StdError for OpenWriteError {
        "error occurred while opening a file for writing"
    }

-    fn cause(&self) -> Option<&StdError> {
+    fn cause(&self) -> Option<&dyn StdError> {
        match *self {
            OpenWriteError::FileAlreadyExists(_) => None,
            OpenWriteError::IOError(ref err) => Some(err),
@@ -121,6 +157,65 @@ impl StdError for OpenWriteError {
    }
 }

+/// Type of index incompatibility between the library and the index found on disk
+/// Used to catch and provide a hint to solve this incompatibility issue
+pub enum Incompatibility {
+    /// This library cannot decompress the index found on disk
+    CompressionMismatch {
+        /// Compression algorithm used by the current version of tantivy
+        library_compression_format: String,
+        /// Compression algorithm that was used to serialise the index
+        index_compression_format: String,
+    },
+    /// The index format found on disk isn't supported by this version of the library
+    IndexMismatch {
+        /// Version used by the library
+        library_version: Version,
+        /// Version the index was built with
+        index_version: Version,
+    },
+}
+
+impl fmt::Debug for Incompatibility {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        match self {
+            Incompatibility::CompressionMismatch {
+                library_compression_format,
+                index_compression_format,
+            } => {
+                let err = format!(
+                    "Library was compiled with {:?} compression, index was compressed with {:?}",
+                    library_compression_format, index_compression_format
+                );
+                let advice = format!(
+                    "Change the feature flag to {:?} and rebuild the library",
+                    index_compression_format
+                );
+                write!(f, "{}. {}", err, advice)?;
+            }
+            Incompatibility::IndexMismatch {
+                library_version,
+                index_version,
+            } => {
+                let err = format!(
+                    "Library version: {}, index version: {}",
+                    library_version.index_format_version, index_version.index_format_version
+                );
+                // TODO make a more useful error message
+                // include the version range that supports this index_format_version
+                let advice = format!(
+                    "Change tantivy to a version compatible with index format {} (e.g. {}.{}.x) \
+                     and rebuild your project.",
+                    index_version.index_format_version, index_version.major, index_version.minor
+                );
+                write!(f, "{}. {}", err, advice)?;
+            }
+        }
+
+        Ok(())
+    }
+}
+
 /// Error that may occur when accessing a file read
 #[derive(Debug)]
 pub enum OpenReadError {
@@ -129,6 +224,8 @@ pub enum OpenReadError {
    /// Any kind of IO error that happens when
    /// interacting with the underlying IO device.
    IOError(IOError),
+    /// This library doesn't support the index version found on disk
+    IncompatibleIndex(Incompatibility),
 }

 impl From<IOError> for OpenReadError {
@@ -138,7 +235,7 @@ impl From<IOError> for OpenReadError {
 }

 impl fmt::Display for OpenReadError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match *self {
            OpenReadError::FileDoesNotExist(ref path) => {
                write!(f, "the file '{:?}' does not exist", path)
@@ -148,19 +245,9 @@ impl fmt::Display for OpenReadError {
                "an io error occurred while opening a file for reading: '{}'",
                err
            ),
-        }
-    }
-}
-
-impl StdError for OpenReadError {
-    fn description(&self) -> &str {
-        "error occurred while opening a file for reading"
-    }
-
-    fn cause(&self) -> Option<&StdError> {
-        match *self {
-            OpenReadError::FileDoesNotExist(_) => None,
-            OpenReadError::IOError(ref err) => Some(err),
+            OpenReadError::IncompatibleIndex(ref footer) => {
+                write!(f, "Incompatible index format: {:?}", footer)
+            }
        }
    }
 }
@@ -181,8 +268,14 @@ impl From<IOError> for DeleteError {
    }
 }

+impl From<Incompatibility> for OpenReadError {
+    fn from(incompatibility: Incompatibility) -> Self {
+        OpenReadError::IncompatibleIndex(incompatibility)
+    }
+}
+
 impl fmt::Display for DeleteError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match *self {
            DeleteError::FileDoesNotExist(ref path) => {
                write!(f, "the file '{:?}' does not exist", path)
@@ -199,7 +292,7 @@ impl StdError for DeleteError {
        "error occurred while deleting a file"
    }

-    fn cause(&self) -> Option<&StdError> {
+    fn cause(&self) -> Option<&dyn StdError> {
        match *self {
            DeleteError::FileDoesNotExist(_) => None,
            DeleteError::IOError(ref err) => Some(err),
--- a/src/directory/footer.rs
+++ b/src/directory/footer.rs
@@ -0,0 +1,390 @@
+use crate::common::{BinarySerializable, CountingWriter, FixedSize, VInt};
+use crate::directory::error::Incompatibility;
+use crate::directory::read_only_source::ReadOnlySource;
+use crate::directory::{AntiCallToken, TerminatingWrite};
+use crate::Version;
+use byteorder::{ByteOrder, LittleEndian, WriteBytesExt};
+use crc32fast::Hasher;
+use std::io;
+use std::io::Write;
+
+const FOOTER_MAX_LEN: usize = 10_000;
+
+type CrcHashU32 = u32;
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct Footer {
+    pub version: Version,
+    pub meta: String,
+    pub versioned_footer: VersionedFooter,
+}
+
+/// Serialises the footer to a byte-array
+/// - versioned_footer_len : 4 bytes
+///-  versioned_footer: variable bytes
+/// - meta_len: 4 bytes
+/// - meta: variable bytes
+/// - version_len: 4 bytes
+/// - version json: variable bytes
+impl BinarySerializable for Footer {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+        BinarySerializable::serialize(&self.versioned_footer, writer)?;
+        BinarySerializable::serialize(&self.meta, writer)?;
+        let version_string =
+            serde_json::to_string(&self.version).map_err(|_err| io::ErrorKind::InvalidInput)?;
+        BinarySerializable::serialize(&version_string, writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let versioned_footer = VersionedFooter::deserialize(reader)?;
+        let meta = String::deserialize(reader)?;
+        let version_json = String::deserialize(reader)?;
+        let version = serde_json::from_str(&version_json)?;
+        Ok(Footer {
+            version,
+            meta,
+            versioned_footer,
+        })
+    }
+}
+
+impl Footer {
+    pub fn new(versioned_footer: VersionedFooter) -> Self {
+        let version = crate::VERSION.clone();
+        let meta = version.to_string();
+        Footer {
+            version,
+            meta,
+            versioned_footer,
+        }
+    }
+
+    pub fn append_footer<W: io::Write>(&self, mut write: &mut W) -> io::Result<()> {
+        let mut counting_write = CountingWriter::wrap(&mut write);
+        self.serialize(&mut counting_write)?;
+        let written_len = counting_write.written_bytes();
+        write.write_u32::<LittleEndian>(written_len as u32)?;
+        Ok(())
+    }
+
+    pub fn extract_footer(source: ReadOnlySource) -> Result<(Footer, ReadOnlySource), io::Error> {
+        if source.len() < 4 {
+            return Err(io::Error::new(
+                io::ErrorKind::UnexpectedEof,
+                format!(
+                    "File corrupted. The file is smaller than 4 bytes (len={}).",
+                    source.len()
+                ),
+            ));
+        }
+        let (body_footer, footer_len_bytes) = source.split_from_end(u32::SIZE_IN_BYTES);
+        let footer_len = LittleEndian::read_u32(footer_len_bytes.as_slice()) as usize;
+        let body_len = body_footer.len() - footer_len;
+        let (body, footer_data) = body_footer.split(body_len);
+        let mut cursor = footer_data.as_slice();
+        let footer = Footer::deserialize(&mut cursor)?;
+        Ok((footer, body))
+    }
+
+    /// Confirms that the index will be read correctly by this version of tantivy
+    /// Has to be called after `extract_footer` to make sure it's not accessing uninitialised memory
+    pub fn is_compatible(&self) -> Result<(), Incompatibility> {
+        let library_version = crate::version();
+        match &self.versioned_footer {
+            VersionedFooter::V1 {
+                crc32: _crc,
+                store_compression,
+            } => {
+                if &library_version.store_compression != store_compression {
+                    return Err(Incompatibility::CompressionMismatch {
+                        library_compression_format: library_version.store_compression.to_string(),
+                        index_compression_format: store_compression.to_string(),
+                    });
+                }
+                Ok(())
+            }
+            VersionedFooter::V2 {
+                crc32: _crc,
+                store_compression,
+            } => {
+                if &library_version.store_compression != store_compression {
+                    return Err(Incompatibility::CompressionMismatch {
+                        library_compression_format: library_version.store_compression.to_string(),
+                        index_compression_format: store_compression.to_string(),
+                    });
+                }
+                Ok(())
+            }
+            VersionedFooter::UnknownVersion => Err(Incompatibility::IndexMismatch {
+                library_version: library_version.clone(),
+                index_version: self.version.clone(),
+            }),
+        }
+    }
+}
+
+/// Footer that includes a crc32 hash that enables us to checksum files in the index
+#[derive(Debug, Clone, PartialEq)]
+pub enum VersionedFooter {
+    UnknownVersion,
+    V1 {
+        crc32: CrcHashU32,
+        store_compression: String,
+    },
+    // Introduction of the Block WAND information.
+    V2 {
+        crc32: CrcHashU32,
+        store_compression: String,
+    },
+}
+
+impl BinarySerializable for VersionedFooter {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+        let mut buf = Vec::new();
+        match self {
+            VersionedFooter::V2 {
+                crc32,
+                store_compression: compression,
+            } => {
+                // Serializes a valid `VersionedFooter` or panics if the version is unknown
+                // [   version    |   crc_hash  | compression_mode ]
+                // [    0..4      |     4..8    |     variable     ]
+                BinarySerializable::serialize(&2u32, &mut buf)?;
+                BinarySerializable::serialize(crc32, &mut buf)?;
+                BinarySerializable::serialize(compression, &mut buf)?;
+            }
+            VersionedFooter::V1 { .. } | VersionedFooter::UnknownVersion => {
+                return Err(io::Error::new(
+                    io::ErrorKind::InvalidInput,
+                    "Cannot serialize an unknown versioned footer ",
+                ));
+            }
+        }
+        BinarySerializable::serialize(&VInt(buf.len() as u64), writer)?;
+        assert!(buf.len() <= FOOTER_MAX_LEN);
+        writer.write_all(&buf[..])?;
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let len = VInt::deserialize(reader)?.0 as usize;
+        if len > FOOTER_MAX_LEN {
+            return Err(io::Error::new(
+                io::ErrorKind::InvalidData,
+                format!(
+                    "Footer seems invalid as it suggests a footer len of {}. File is corrupted, \
+            or the index was created with a different & old version of tantivy.",
+                    len
+                ),
+            ));
+        }
+        let mut buf = vec![0u8; len];
+        reader.read_exact(&mut buf[..])?;
+        let mut cursor = &buf[..];
+        let version = u32::deserialize(&mut cursor)?;
+        if version != 1 && version != 2 {
+            return Ok(VersionedFooter::UnknownVersion);
+        }
+        let crc32 = u32::deserialize(&mut cursor)?;
+        let store_compression = String::deserialize(&mut cursor)?;
+        Ok(if version == 1 {
+            VersionedFooter::V1 {
+                crc32,
+                store_compression,
+            }
+        } else {
+            assert_eq!(version, 2);
+            VersionedFooter::V2 {
+                crc32,
+                store_compression,
+            }
+        })
+    }
+}
+
+impl VersionedFooter {
+    pub fn crc(&self) -> Option<CrcHashU32> {
+        match self {
+            VersionedFooter::V2 { crc32, .. } => Some(*crc32),
+            VersionedFooter::V1 { crc32, .. } => Some(*crc32),
+            VersionedFooter::UnknownVersion { .. } => None,
+        }
+    }
+}
+
+pub(crate) struct FooterProxy<W: TerminatingWrite> {
+    /// always Some except after terminate call
+    hasher: Option<Hasher>,
+    /// always Some except after terminate call
+    writer: Option<W>,
+}
+
+impl<W: TerminatingWrite> FooterProxy<W> {
+    pub fn new(writer: W) -> Self {
+        FooterProxy {
+            hasher: Some(Hasher::new()),
+            writer: Some(writer),
+        }
+    }
+}
+
+impl<W: TerminatingWrite> Write for FooterProxy<W> {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        let count = self.writer.as_mut().unwrap().write(buf)?;
+        self.hasher.as_mut().unwrap().update(&buf[..count]);
+        Ok(count)
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        self.writer.as_mut().unwrap().flush()
+    }
+}
+
+impl<W: TerminatingWrite> TerminatingWrite for FooterProxy<W> {
+    fn terminate_ref(&mut self, _: AntiCallToken) -> io::Result<()> {
+        let crc32 = self.hasher.take().unwrap().finalize();
+        let footer = Footer::new(VersionedFooter::V2 {
+            crc32,
+            store_compression: crate::store::COMPRESSION.to_string(),
+        });
+        let mut writer = self.writer.take().unwrap();
+        footer.append_footer(&mut writer)?;
+        writer.terminate()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::CrcHashU32;
+    use super::FooterProxy;
+    use crate::common::{BinarySerializable, VInt};
+    use crate::directory::footer::{Footer, VersionedFooter};
+    use crate::directory::TerminatingWrite;
+    use byteorder::{ByteOrder, LittleEndian};
+    use regex::Regex;
+    use std::io;
+
+    #[test]
+    fn test_versioned_footer() {
+        let mut vec = Vec::new();
+        let footer_proxy = FooterProxy::new(&mut vec);
+        assert!(footer_proxy.terminate().is_ok());
+        assert_eq!(vec.len(), 167);
+        let footer = Footer::deserialize(&mut &vec[..]).unwrap();
+        assert!(matches!(
+           footer.versioned_footer,
+           VersionedFooter::V2 { store_compression, .. }
+           if store_compression == crate::store::COMPRESSION
+        ));
+        assert_eq!(&footer.version, crate::version());
+    }
+
+    #[test]
+    fn test_serialize_deserialize_footer() {
+        let mut buffer = Vec::new();
+        let crc32 = 123456u32;
+        let footer: Footer = Footer::new(VersionedFooter::V2 {
+            crc32,
+            store_compression: "lz4".to_string(),
+        });
+        footer.serialize(&mut buffer).unwrap();
+        let footer_deser = Footer::deserialize(&mut &buffer[..]).unwrap();
+        assert_eq!(footer_deser, footer);
+    }
+
+    #[test]
+    fn footer_length() {
+        let crc32 = 1111111u32;
+        let versioned_footer = VersionedFooter::V2 {
+            crc32,
+            store_compression: "lz4".to_string(),
+        };
+        let mut buf = Vec::new();
+        versioned_footer.serialize(&mut buf).unwrap();
+        assert_eq!(buf.len(), 13);
+        let footer = Footer::new(versioned_footer);
+        let regex_ptn = Regex::new(
+            "tantivy v[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.{0,10}, index_format v[0-9]{1,5}",
+        )
+        .unwrap();
+        assert!(regex_ptn.is_match(&footer.meta));
+    }
+
+    #[test]
+    fn versioned_footer_from_bytes() {
+        let v_footer_bytes = vec![
+            // versionned footer length
+            12 | 128,
+            // index format version
+            2,
+            0,
+            0,
+            0,
+            // crc 32
+            12,
+            35,
+            89,
+            18,
+            // compression format
+            3 | 128,
+            b'l',
+            b'z',
+            b'4',
+        ];
+        let mut cursor = &v_footer_bytes[..];
+        let versioned_footer = VersionedFooter::deserialize(&mut cursor).unwrap();
+        assert!(cursor.is_empty());
+        let expected_crc: u32 = LittleEndian::read_u32(&v_footer_bytes[5..9]) as CrcHashU32;
+        let expected_versioned_footer: VersionedFooter = VersionedFooter::V2 {
+            crc32: expected_crc,
+            store_compression: "lz4".to_string(),
+        };
+        assert_eq!(versioned_footer, expected_versioned_footer);
+        let mut buffer = Vec::new();
+        assert!(versioned_footer.serialize(&mut buffer).is_ok());
+        assert_eq!(&v_footer_bytes[..], &buffer[..]);
+    }
+
+    #[test]
+    fn versioned_footer_panic() {
+        let v_footer_bytes = vec![6u8 | 128u8, 3u8, 0u8, 0u8, 1u8, 0u8, 0u8];
+        let mut b = &v_footer_bytes[..];
+        let versioned_footer = VersionedFooter::deserialize(&mut b).unwrap();
+        assert!(b.is_empty());
+        let expected_versioned_footer = VersionedFooter::UnknownVersion;
+        assert_eq!(versioned_footer, expected_versioned_footer);
+        let mut buf = Vec::new();
+        assert!(versioned_footer.serialize(&mut buf).is_err());
+    }
+
+    #[test]
+    #[cfg(not(feature = "lz4"))]
+    fn compression_mismatch() {
+        let crc32 = 1111111u32;
+        let versioned_footer = VersionedFooter::V1 {
+            crc32,
+            store_compression: "lz4".to_string(),
+        };
+        let footer = Footer::new(versioned_footer);
+        let res = footer.is_compatible();
+        assert!(res.is_err());
+    }
+
+    #[test]
+    fn test_deserialize_too_large_footer() {
+        let mut buf = vec![];
+        assert!(FooterProxy::new(&mut buf).terminate().is_ok());
+        let mut long_len_buf = [0u8; 10];
+        let num_bytes = VInt(super::FOOTER_MAX_LEN as u64 + 1u64).serialize_into(&mut long_len_buf);
+        buf[0..num_bytes].copy_from_slice(&long_len_buf[..num_bytes]);
+        let err = Footer::deserialize(&mut &buf[..]).unwrap_err();
+        assert_eq!(err.kind(), io::ErrorKind::InvalidData);
+        assert_eq!(
+            err.to_string(),
+            "Footer seems invalid as it suggests a footer len of 10001. File is corrupted, \
+            or the index was created with a different & old version of tantivy."
+        );
+    }
+}
--- a/src/directory/managed_directory.rs
+++ b/src/directory/managed_directory.rs
@@ -1,8 +1,16 @@
-use core::MANAGED_FILEPATH;
-use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError};
-use directory::{ReadOnlySource, WritePtr};
-use error::{ErrorKind, Result, ResultExt};
-use serde_json;
+use crate::core::MANAGED_FILEPATH;
+use crate::directory::error::{DeleteError, IOError, LockError, OpenReadError, OpenWriteError};
+use crate::directory::footer::{Footer, FooterProxy};
+use crate::directory::DirectoryLock;
+use crate::directory::GarbageCollectionResult;
+use crate::directory::Lock;
+use crate::directory::META_LOCK;
+use crate::directory::{ReadOnlySource, WritePtr};
+use crate::directory::{WatchCallback, WatchHandle};
+use crate::error::DataCorruption;
+use crate::Directory;
+
+use crc32fast::Hasher;
 use std::collections::HashSet;
 use std::io;
 use std::io::Write;
@@ -10,7 +18,17 @@ use std::path::{Path, PathBuf};
 use std::result;
 use std::sync::RwLockWriteGuard;
 use std::sync::{Arc, RwLock};
-use Directory;
+
+/// Returns true iff the file is "managed".
+/// Non-managed file are not subject to garbage collection.
+///
+/// Filenames that starts by a "." -typically locks-
+/// are not managed.
+fn is_managed(path: &Path) -> bool {
+    path.to_str()
+        .map(|p_str| !p_str.starts_with('.'))
+        .unwrap_or(true)
+}

 /// Wrapper of directories that keeps track of files created by Tantivy.
 ///
@@ -23,7 +41,7 @@ use Directory;
 /// useful anymore.
 #[derive(Debug)]
 pub struct ManagedDirectory {
-    directory: Box<Directory>,
+    directory: Box<dyn Directory>,
    meta_informations: Arc<RwLock<MetaInformation>>,
 }

@@ -35,23 +53,28 @@ struct MetaInformation {
 /// Saves the file containing the list of existing files
 /// that were created by tantivy.
 fn save_managed_paths(
-    directory: &mut Directory,
-    wlock: &RwLockWriteGuard<MetaInformation>,
+    directory: &mut dyn Directory,
+    wlock: &RwLockWriteGuard<'_, MetaInformation>,
 ) -> io::Result<()> {
    let mut w = serde_json::to_vec(&wlock.managed_paths)?;
-    write!(&mut w, "\n")?;
+    writeln!(&mut w)?;
    directory.atomic_write(&MANAGED_FILEPATH, &w[..])?;
    Ok(())
 }

 impl ManagedDirectory {
    /// Wraps a directory as managed directory.
-    pub fn new<Dir: Directory>(directory: Dir) -> Result<ManagedDirectory> {
+    pub fn wrap<Dir: Directory>(directory: Dir) -> crate::Result<ManagedDirectory> {
        match directory.atomic_read(&MANAGED_FILEPATH) {
            Ok(data) => {
                let managed_files_json = String::from_utf8_lossy(&data);
                let managed_files: HashSet<PathBuf> = serde_json::from_str(&managed_files_json)
-                    .chain_err(|| ErrorKind::CorruptedFile(MANAGED_FILEPATH.clone()))?;
+                    .map_err(|e| {
+                        DataCorruption::new(
+                            MANAGED_FILEPATH.to_path_buf(),
+                            format!("Managed file cannot be deserialized: {:?}. ", e),
+                        )
+                    })?;
                Ok(ManagedDirectory {
                    directory: Box::new(directory),
                    meta_informations: Arc::new(RwLock::new(MetaInformation {
@@ -64,6 +87,11 @@ impl ManagedDirectory {
                meta_informations: Arc::default(),
            }),
            Err(OpenReadError::IOError(e)) => Err(From::from(e)),
+            Err(OpenReadError::IncompatibleIndex(incompatibility)) => {
+                // For the moment, this should never happen  `meta.json`
+                // do not have any footer and cannot detect incompatibility.
+                Err(crate::TantivyError::IncompatibleIndex(incompatibility))
+            }
        }
    }

@@ -74,55 +102,78 @@ impl ManagedDirectory {
    ///
    /// * `living_files` - List of files that are still used by the index.
    ///
+    /// The use a callback ensures that the list of living_files is computed
+    /// while we hold the lock on meta.
+    ///
    /// This method does not panick nor returns errors.
    /// If a file cannot be deleted (for permission reasons for instance)
    /// an error is simply logged, and the file remains in the list of managed
    /// files.
-    pub fn garbage_collect<L: FnOnce() -> HashSet<PathBuf>>(&mut self, get_living_files: L) {
+    pub fn garbage_collect<L: FnOnce() -> HashSet<PathBuf>>(
+        &mut self,
+        get_living_files: L,
+    ) -> crate::Result<GarbageCollectionResult> {
        info!("Garbage collect");
        let mut files_to_delete = vec![];
+
+        // It is crucial to get the living files after acquiring the
+        // read lock of meta informations. That way, we
+        // avoid the following scenario.
+        //
+        // 1) we get the list of living files.
+        // 2) someone creates a new file.
+        // 3) we start garbage collection and remove this file
+        // even though it is a living file.
+        //
+        // releasing the lock as .delete() will use it too.
        {
-            // releasing the lock as .delete() will use it too.
-            let meta_informations_rlock = self.meta_informations
+            let meta_informations_rlock = self
+                .meta_informations
                .read()
                .expect("Managed directory rlock poisoned in garbage collect.");

-            // It is crucial to get the living files after acquiring the
-            // read lock of meta informations. That way, we
-            // avoid the following scenario.
-            //
-            // 1) we get the list of living files.
-            // 2) someone creates a new file.
-            // 3) we start garbage collection and remove this file
-            // even though it is a living file.
-            let living_files = get_living_files();
-
-            for managed_path in &meta_informations_rlock.managed_paths {
-                if !living_files.contains(managed_path) {
-                    files_to_delete.push(managed_path.clone());
+            // The point of this second "file" lock is to enforce the following scenario
+            // 1) process B tries to load a new set of searcher.
+            // The list of segments is loaded
+            // 2) writer change meta.json (for instance after a merge or a commit)
+            // 3) gc kicks in.
+            // 4) gc removes a file that was useful for process B, before process B opened it.
+            match self.acquire_lock(&META_LOCK) {
+                Ok(_meta_lock) => {
+                    let living_files = get_living_files();
+                    for managed_path in &meta_informations_rlock.managed_paths {
+                        if !living_files.contains(managed_path) {
+                            files_to_delete.push(managed_path.clone());
+                        }
+                    }
+                }
+                Err(err) => {
+                    error!("Failed to acquire lock for GC");
+                    return Err(crate::TantivyError::from(err));
                }
            }
        }

+        let mut failed_to_delete_files = vec![];
        let mut deleted_files = vec![];
-        {
-            for file_to_delete in files_to_delete {
-                match self.delete(&file_to_delete) {
-                    Ok(_) => {
-                        info!("Deleted {:?}", file_to_delete);
-                        deleted_files.push(file_to_delete);
-                    }
-                    Err(file_error) => {
-                        match file_error {
-                            DeleteError::FileDoesNotExist(_) => {
-                                deleted_files.push(file_to_delete);
-                            }
-                            DeleteError::IOError(_) => {
-                                if !cfg!(target_os = "windows") {
-                                    // On windows, delete is expected to fail if the file
-                                    // is mmapped.
-                                    error!("Failed to delete {:?}", file_to_delete);
-                                }
+
+        for file_to_delete in files_to_delete {
+            match self.delete(&file_to_delete) {
+                Ok(_) => {
+                    info!("Deleted {:?}", file_to_delete);
+                    deleted_files.push(file_to_delete);
+                }
+                Err(file_error) => {
+                    match file_error {
+                        DeleteError::FileDoesNotExist(_) => {
+                            deleted_files.push(file_to_delete.clone());
+                        }
+                        DeleteError::IOError(_) => {
+                            failed_to_delete_files.push(file_to_delete.clone());
+                            if !cfg!(target_os = "windows") {
+                                // On windows, delete is expected to fail if the file
+                                // is mmapped.
+                                error!("Failed to delete {:?}", file_to_delete);
                            }
                        }
                    }
@@ -133,19 +184,21 @@ impl ManagedDirectory {
        if !deleted_files.is_empty() {
            // update the list of managed files by removing
            // the file that were removed.
-            let mut meta_informations_wlock = self.meta_informations
+            let mut meta_informations_wlock = self
+                .meta_informations
                .write()
                .expect("Managed directory wlock poisoned (2).");
-            {
-                let managed_paths_write = &mut meta_informations_wlock.managed_paths;
-                for delete_file in &deleted_files {
-                    managed_paths_write.remove(delete_file);
-                }
-            }
-            if save_managed_paths(self.directory.as_mut(), &meta_informations_wlock).is_err() {
-                error!("Failed to save the list of managed files.");
+            let managed_paths_write = &mut meta_informations_wlock.managed_paths;
+            for delete_file in &deleted_files {
+                managed_paths_write.remove(delete_file);
            }
+            save_managed_paths(self.directory.as_mut(), &meta_informations_wlock)?;
        }
+
+        Ok(GarbageCollectionResult {
+            deleted_files,
+            failed_to_delete_files,
+        })
    }

    /// Registers a file as managed
@@ -155,8 +208,17 @@ impl ManagedDirectory {
    /// registering the filepath and creating the file
    /// will not lead to garbage files that will
    /// never get removed.
+    ///
+    /// File starting by "." are reserved to locks.
+    /// They are not managed and cannot be subjected
+    /// to garbage collection.
    fn register_file_as_managed(&mut self, filepath: &Path) -> io::Result<()> {
-        let mut meta_wlock = self.meta_informations
+        // Files starting by "." (e.g. lock files) are not managed.
+        if !is_managed(filepath) {
+            return Ok(());
+        }
+        let mut meta_wlock = self
+            .meta_informations
            .write()
            .expect("Managed file lock poisoned");
        let has_changed = meta_wlock.managed_paths.insert(filepath.to_owned());
@@ -165,17 +227,60 @@ impl ManagedDirectory {
        }
        Ok(())
    }
+
+    /// Verify checksum of a managed file
+    pub fn validate_checksum(&self, path: &Path) -> result::Result<bool, OpenReadError> {
+        let reader = self.directory.open_read(path)?;
+        let (footer, data) = Footer::extract_footer(reader)
+            .map_err(|err| IOError::with_path(path.to_path_buf(), err))?;
+        let mut hasher = Hasher::new();
+        hasher.update(data.as_slice());
+        let crc = hasher.finalize();
+        Ok(footer
+            .versioned_footer
+            .crc()
+            .map(|v| v == crc)
+            .unwrap_or(false))
+    }
+
+    /// List files for which checksum does not match content
+    pub fn list_damaged(&self) -> result::Result<HashSet<PathBuf>, OpenReadError> {
+        let mut hashset = HashSet::new();
+        let managed_paths = self
+            .meta_informations
+            .read()
+            .expect("Managed directory rlock poisoned in list damaged.")
+            .managed_paths
+            .clone();
+
+        for path in managed_paths.into_iter() {
+            if !self.validate_checksum(&path)? {
+                hashset.insert(path);
+            }
+        }
+        Ok(hashset)
+    }
 }

 impl Directory for ManagedDirectory {
    fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
-        self.directory.open_read(path)
+        let read_only_source = self.directory.open_read(path)?;
+        let (footer, reader) = Footer::extract_footer(read_only_source)
+            .map_err(|err| IOError::with_path(path.to_path_buf(), err))?;
+        footer.is_compatible()?;
+        Ok(reader)
    }

    fn open_write(&mut self, path: &Path) -> result::Result<WritePtr, OpenWriteError> {
        self.register_file_as_managed(path)
            .map_err(|e| IOError::with_path(path.to_owned(), e))?;
-        self.directory.open_write(path)
+        Ok(io::BufWriter::new(Box::new(FooterProxy::new(
+            self.directory
+                .open_write(path)?
+                .into_inner()
+                .map_err(|_| ())
+                .expect("buffer should be empty"),
+        ))))
    }

    fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
@@ -195,8 +300,12 @@ impl Directory for ManagedDirectory {
        self.directory.exists(path)
    }

-    fn box_clone(&self) -> Box<Directory> {
-        Box::new(self.clone())
+    fn acquire_lock(&self, lock: &Lock) -> result::Result<DirectoryLock, LockError> {
+        self.directory.acquire_lock(lock)
+    }
+
+    fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
+        self.directory.watch(watch_callback)
    }
 }

@@ -209,98 +318,118 @@ impl Clone for ManagedDirectory {
    }
 }

+#[cfg(feature = "mmap")]
 #[cfg(test)]
-mod tests {
+mod tests_mmap_specific {

-    use super::*;
-    #[cfg(feature = "mmap")]
-    use directory::MmapDirectory;
+    use crate::directory::{Directory, ManagedDirectory, MmapDirectory, TerminatingWrite};
+    use std::collections::HashSet;
+    use std::fs::OpenOptions;
    use std::io::Write;
-    use std::path::Path;
-    use tempdir::TempDir;
-
-    lazy_static! {
-        static ref TEST_PATH1: &'static Path = Path::new("some_path_for_test");
-        static ref TEST_PATH2: &'static Path = Path::new("some_path_for_test2");
-    }
+    use std::path::{Path, PathBuf};
+    use tempfile::TempDir;

    #[test]
-    #[cfg(feature = "mmap")]
    fn test_managed_directory() {
-        let tempdir = TempDir::new("index").unwrap();
+        let tempdir = TempDir::new().unwrap();
        let tempdir_path = PathBuf::from(tempdir.path());
+
+        let test_path1: &'static Path = Path::new("some_path_for_test");
+        let test_path2: &'static Path = Path::new("some_path_for_test_2");
        {
            let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
-            let mut managed_directory = ManagedDirectory::new(mmap_directory).unwrap();
-            {
-                let mut write_file = managed_directory.open_write(*TEST_PATH1).unwrap();
-                write_file.flush().unwrap();
-            }
-            {
-                managed_directory
-                    .atomic_write(*TEST_PATH2, &vec![0u8, 1u8])
-                    .unwrap();
-            }
-            {
-                assert!(managed_directory.exists(*TEST_PATH1));
-                assert!(managed_directory.exists(*TEST_PATH2));
-            }
-            {
-                let living_files: HashSet<PathBuf> =
-                    [TEST_PATH1.to_owned()].into_iter().cloned().collect();
-                managed_directory.garbage_collect(|| living_files);
-            }
-            {
-                assert!(managed_directory.exists(*TEST_PATH1));
-                assert!(!managed_directory.exists(*TEST_PATH2));
-            }
+            let mut managed_directory = ManagedDirectory::wrap(mmap_directory).unwrap();
+            let write_file = managed_directory.open_write(test_path1).unwrap();
+            write_file.terminate().unwrap();
+            managed_directory
+                .atomic_write(test_path2, &[0u8, 1u8])
+                .unwrap();
+            assert!(managed_directory.exists(test_path1));
+            assert!(managed_directory.exists(test_path2));
+            let living_files: HashSet<PathBuf> = [test_path1.to_owned()].iter().cloned().collect();
+            assert!(managed_directory.garbage_collect(|| living_files).is_ok());
+            assert!(managed_directory.exists(test_path1));
+            assert!(!managed_directory.exists(test_path2));
        }
        {
            let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
-            let mut managed_directory = ManagedDirectory::new(mmap_directory).unwrap();
-            {
-                assert!(managed_directory.exists(*TEST_PATH1));
-                assert!(!managed_directory.exists(*TEST_PATH2));
-            }
-            {
-                let living_files: HashSet<PathBuf> = HashSet::new();
-                managed_directory.garbage_collect(|| living_files);
-            }
-            {
-                assert!(!managed_directory.exists(*TEST_PATH1));
-                assert!(!managed_directory.exists(*TEST_PATH2));
-            }
+            let mut managed_directory = ManagedDirectory::wrap(mmap_directory).unwrap();
+            assert!(managed_directory.exists(test_path1));
+            assert!(!managed_directory.exists(test_path2));
+            let living_files: HashSet<PathBuf> = HashSet::new();
+            assert!(managed_directory.garbage_collect(|| living_files).is_ok());
+            assert!(!managed_directory.exists(test_path1));
+            assert!(!managed_directory.exists(test_path2));
        }
    }

    #[test]
-    #[cfg(feature = "mmap ")]
    fn test_managed_directory_gc_while_mmapped() {
-        let tempdir = TempDir::new("index").unwrap();
+        let test_path1: &'static Path = Path::new("some_path_for_test");
+
+        let tempdir = TempDir::new().unwrap();
        let tempdir_path = PathBuf::from(tempdir.path());
        let living_files = HashSet::new();

        let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
-        let mut managed_directory = ManagedDirectory::new(mmap_directory).unwrap();
-        managed_directory
-            .atomic_write(*TEST_PATH1, &vec![0u8, 1u8])
-            .unwrap();
-        assert!(managed_directory.exists(*TEST_PATH1));
+        let mut managed_directory = ManagedDirectory::wrap(mmap_directory).unwrap();
+        let mut write = managed_directory.open_write(test_path1).unwrap();
+        write.write_all(&[0u8, 1u8]).unwrap();
+        write.terminate().unwrap();
+        assert!(managed_directory.exists(test_path1));

-        let _mmap_read = managed_directory.open_read(*TEST_PATH1).unwrap();
-        managed_directory.garbage_collect(|| living_files.clone());
+        let _mmap_read = managed_directory.open_read(test_path1).unwrap();
+        assert!(managed_directory
+            .garbage_collect(|| living_files.clone())
+            .is_ok());
        if cfg!(target_os = "windows") {
            // On Windows, gc should try and fail the file as it is mmapped.
-            assert!(managed_directory.exists(*TEST_PATH1));
+            assert!(managed_directory.exists(test_path1));
            // unmap should happen here.
            drop(_mmap_read);
            // The file should still be in the list of managed file and
            // eventually be deleted once mmap is released.
-            managed_directory.garbage_collect(|| living_files);
-            assert!(!managed_directory.exists(*TEST_PATH1));
+            assert!(managed_directory.garbage_collect(|| living_files).is_ok());
+            assert!(!managed_directory.exists(test_path1));
        } else {
-            assert!(!managed_directory.exists(*TEST_PATH1));
+            assert!(!managed_directory.exists(test_path1));
        }
    }

+    #[test]
+    fn test_checksum() {
+        let test_path1: &'static Path = Path::new("some_path_for_test");
+        let test_path2: &'static Path = Path::new("other_test_path");
+
+        let tempdir = TempDir::new().unwrap();
+        let tempdir_path = PathBuf::from(tempdir.path());
+
+        let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
+        let mut managed_directory = ManagedDirectory::wrap(mmap_directory).unwrap();
+        let mut write = managed_directory.open_write(test_path1).unwrap();
+        write.write_all(&[0u8, 1u8]).unwrap();
+        write.terminate().unwrap();
+
+        let mut write = managed_directory.open_write(test_path2).unwrap();
+        write.write_all(&[3u8, 4u8, 5u8]).unwrap();
+        write.terminate().unwrap();
+
+        let read_source = managed_directory.open_read(test_path2).unwrap();
+        assert_eq!(read_source.as_slice(), &[3u8, 4u8, 5u8]);
+        assert!(managed_directory.list_damaged().unwrap().is_empty());
+
+        let mut corrupted_path = tempdir_path.clone();
+        corrupted_path.push(test_path2);
+        let mut file = OpenOptions::new()
+            .write(true)
+            .open(&corrupted_path)
+            .unwrap();
+        file.write_all(&[255u8]).unwrap();
+        file.flush().unwrap();
+        drop(file);
+
+        let damaged = managed_directory.list_damaged().unwrap();
+        assert_eq!(damaged.len(), 1);
+        assert!(damaged.contains(test_path2));
+    }
 }
--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -1,12 +1,24 @@
-use atomicwrites;
-use common::make_io_err;
-use directory::error::{DeleteError, IOError, OpenDirectoryError, OpenReadError, OpenWriteError};
-use directory::shared_vec_slice::SharedVecSlice;
-use directory::Directory;
-use directory::ReadOnlySource;
-use directory::WritePtr;
-use fst::raw::MmapReadOnly;
-use std::collections::hash_map::Entry as HashMapEntry;
+use crate::core::META_FILEPATH;
+use crate::directory::error::LockError;
+use crate::directory::error::{
+    DeleteError, IOError, OpenDirectoryError, OpenReadError, OpenWriteError,
+};
+use crate::directory::read_only_source::BoxedData;
+use crate::directory::AntiCallToken;
+use crate::directory::Directory;
+use crate::directory::DirectoryLock;
+use crate::directory::Lock;
+use crate::directory::ReadOnlySource;
+use crate::directory::WatchCallback;
+use crate::directory::WatchCallbackList;
+use crate::directory::WatchHandle;
+use crate::directory::{TerminatingWrite, WritePtr};
+use fs2::FileExt;
+use memmap::Mmap;
+use notify::RawEvent;
+use notify::RecursiveMode;
+use notify::Watcher;
+use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::convert::From;
 use std::fmt;
@@ -16,14 +28,22 @@ use std::io::{self, Seek, SeekFrom};
 use std::io::{BufWriter, Read, Write};
 use std::path::{Path, PathBuf};
 use std::result;
+use std::sync::mpsc::{channel, Receiver, Sender};
 use std::sync::Arc;
+use std::sync::Mutex;
 use std::sync::RwLock;
-use tempdir::TempDir;
+use std::sync::Weak;
+use std::thread;
+use tempfile::TempDir;
+
+/// Create a default io error given a string.
+pub(crate) fn make_io_err(msg: String) -> io::Error {
+    io::Error::new(io::ErrorKind::Other, msg)
+}

 /// Returns None iff the file exists, can be read, but is empty (and hence
-/// cannot be mmapped).
-///
-fn open_mmap(full_path: &Path) -> result::Result<Option<MmapReadOnly>, OpenReadError> {
+/// cannot be mmapped)
+fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
    let file = File::open(full_path).map_err(|e| {
        if e.kind() == io::ErrorKind::NotFound {
            OpenReadError::FileDoesNotExist(full_path.to_owned())
@@ -32,7 +52,8 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<MmapReadOnly>, OpenReadE
        }
    })?;

-    let meta_data = file.metadata()
+    let meta_data = file
+        .metadata()
        .map_err(|e| IOError::with_path(full_path.to_owned(), e))?;
    if meta_data.len() == 0 {
        // if the file size is 0, it will not be possible
@@ -41,7 +62,7 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<MmapReadOnly>, OpenReadE
        return Ok(None);
    }
    unsafe {
-        MmapReadOnly::open(&file)
+        memmap::Mmap::map(&file)
            .map(Some)
            .map_err(|e| From::from(IOError::with_path(full_path.to_owned(), e)))
    }
@@ -64,7 +85,7 @@ pub struct CacheInfo {

 struct MmapCache {
    counters: CacheCounters,
-    cache: HashMap<PathBuf, MmapReadOnly>,
+    cache: HashMap<PathBuf, Weak<BoxedData>>,
 }

 impl Default for MmapCache {
@@ -77,12 +98,7 @@ impl Default for MmapCache {
 }

 impl MmapCache {
-    /// Removes a `MmapReadOnly` entry from the mmap cache.
-    fn discard_from_cache(&mut self, full_path: &Path) -> bool {
-        self.cache.remove(full_path).is_some()
-    }
-
-    fn get_info(&mut self) -> CacheInfo {
+    fn get_info(&self) -> CacheInfo {
        let paths: Vec<PathBuf> = self.cache.keys().cloned().collect();
        CacheInfo {
            counters: self.counters.clone(),
@@ -90,57 +106,175 @@ impl MmapCache {
        }
    }

-    fn get_mmap(&mut self, full_path: &Path) -> Result<Option<MmapReadOnly>, OpenReadError> {
-        Ok(match self.cache.entry(full_path.to_owned()) {
-            HashMapEntry::Occupied(occupied_entry) => {
-                let mmap = occupied_entry.get();
+    fn remove_weak_ref(&mut self) {
+        let keys_to_remove: Vec<PathBuf> = self
+            .cache
+            .iter()
+            .filter(|(_, mmap_weakref)| mmap_weakref.upgrade().is_none())
+            .map(|(key, _)| key.clone())
+            .collect();
+        for key in keys_to_remove {
+            self.cache.remove(&key);
+        }
+    }
+
+    // Returns None if the file exists but as a len of 0 (and hence is not mmappable).
+    fn get_mmap(&mut self, full_path: &Path) -> Result<Option<Arc<BoxedData>>, OpenReadError> {
+        if let Some(mmap_weak) = self.cache.get(full_path) {
+            if let Some(mmap_arc) = mmap_weak.upgrade() {
                self.counters.hit += 1;
-                Some(mmap.clone())
+                return Ok(Some(mmap_arc));
            }
-            HashMapEntry::Vacant(vacant_entry) => {
-                self.counters.miss += 1;
-                if let Some(mmap) = open_mmap(full_path)? {
-                    vacant_entry.insert(mmap.clone());
-                    Some(mmap)
-                } else {
-                    None
+        }
+        self.cache.remove(full_path);
+        self.counters.miss += 1;
+        let mmap_opt = open_mmap(full_path)?;
+        Ok(mmap_opt.map(|mmap| {
+            let mmap_arc: Arc<BoxedData> = Arc::new(Box::new(mmap));
+            let mmap_weak = Arc::downgrade(&mmap_arc);
+            self.cache.insert(full_path.to_owned(), mmap_weak);
+            mmap_arc
+        }))
+    }
+}
+
+struct WatcherWrapper {
+    _watcher: Mutex<notify::RecommendedWatcher>,
+    watcher_router: Arc<WatchCallbackList>,
+}
+
+impl WatcherWrapper {
+    pub fn new(path: &Path) -> Result<Self, OpenDirectoryError> {
+        let (tx, watcher_recv): (Sender<RawEvent>, Receiver<RawEvent>) = channel();
+        // We need to initialize the
+        let watcher = notify::raw_watcher(tx)
+            .and_then(|mut watcher| {
+                watcher.watch(path, RecursiveMode::Recursive)?;
+                Ok(watcher)
+            })
+            .map_err(|err| match err {
+                notify::Error::PathNotFound => OpenDirectoryError::DoesNotExist(path.to_owned()),
+                _ => {
+                    panic!("Unknown error while starting watching directory {:?}", path);
                }
-            }
+            })?;
+        let watcher_router: Arc<WatchCallbackList> = Default::default();
+        let watcher_router_clone = watcher_router.clone();
+        thread::Builder::new()
+            .name("meta-file-watch-thread".to_string())
+            .spawn(move || {
+                loop {
+                    match watcher_recv.recv().map(|evt| evt.path) {
+                        Ok(Some(changed_path)) => {
+                            // ... Actually subject to false positive.
+                            // We might want to be more accurate than this at one point.
+                            if let Some(filename) = changed_path.file_name() {
+                                if filename == *META_FILEPATH {
+                                    let _ = watcher_router_clone.broadcast();
+                                }
+                            }
+                        }
+                        Ok(None) => {
+                            // not an event we are interested in.
+                        }
+                        Err(_e) => {
+                            // the watch send channel was dropped
+                            break;
+                        }
+                    }
+                }
+            })?;
+        Ok(WatcherWrapper {
+            _watcher: Mutex::new(watcher),
+            watcher_router,
        })
    }
+
+    pub fn watch(&mut self, watch_callback: WatchCallback) -> WatchHandle {
+        self.watcher_router.subscribe(watch_callback)
+    }
 }

 /// Directory storing data in files, read via mmap.
 ///
 /// The Mmap object are cached to limit the
 /// system calls.
+///
+/// In the `MmapDirectory`, locks are implemented using the `fs2` crate definition of locks.
+///
+/// On MacOS & linux, it relies on `flock` (aka `BSD Lock`). These locks solve most of the
+/// problems related to POSIX Locks, but may their contract may not be respected on `NFS`
+/// depending on the implementation.
+///
+/// On Windows the semantics are again different.
 #[derive(Clone)]
 pub struct MmapDirectory {
+    inner: Arc<MmapDirectoryInner>,
+}
+
+struct MmapDirectoryInner {
    root_path: PathBuf,
-    mmap_cache: Arc<RwLock<MmapCache>>,
-    _temp_directory: Arc<Option<TempDir>>,
+    mmap_cache: RwLock<MmapCache>,
+    _temp_directory: Option<TempDir>,
+    watcher: RwLock<Option<WatcherWrapper>>,
+}
+
+impl MmapDirectoryInner {
+    fn new(root_path: PathBuf, temp_directory: Option<TempDir>) -> MmapDirectoryInner {
+        MmapDirectoryInner {
+            root_path,
+            mmap_cache: Default::default(),
+            _temp_directory: temp_directory,
+            watcher: RwLock::new(None),
+        }
+    }
+
+    fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
+        // a lot of juggling here, to ensure we don't do anything that panics
+        // while the rwlock is held. That way we ensure that the rwlock cannot
+        // be poisoned.
+        //
+        // The downside is that we might create a watch wrapper that is not useful.
+        let need_initialization = self.watcher.read().unwrap().is_none();
+        if need_initialization {
+            let watch_wrapper = WatcherWrapper::new(&self.root_path)?;
+            let mut watch_wlock = self.watcher.write().unwrap();
+            // the watcher could have been initialized when we released the lock, and
+            // we do not want to lose the watched files that were set.
+            if watch_wlock.is_none() {
+                *watch_wlock = Some(watch_wrapper);
+            }
+        }
+        if let Some(watch_wrapper) = self.watcher.write().unwrap().as_mut() {
+            Ok(watch_wrapper.watch(watch_callback))
+        } else {
+            unreachable!("At this point, watch wrapper is supposed to be initialized");
+        }
+    }
 }

 impl fmt::Debug for MmapDirectory {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "MmapDirectory({:?})", self.root_path)
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "MmapDirectory({:?})", self.inner.root_path)
    }
 }

 impl MmapDirectory {
+    fn new(root_path: PathBuf, temp_directory: Option<TempDir>) -> MmapDirectory {
+        let inner = MmapDirectoryInner::new(root_path, temp_directory);
+        MmapDirectory {
+            inner: Arc::new(inner),
+        }
+    }
+
    /// Creates a new MmapDirectory in a temporary directory.
    ///
    /// This is mostly useful to test the MmapDirectory itself.
    /// For your unit tests, prefer the RAMDirectory.
-    pub fn create_from_tempdir() -> io::Result<MmapDirectory> {
-        let tempdir = TempDir::new("index")?;
+    pub fn create_from_tempdir() -> Result<MmapDirectory, OpenDirectoryError> {
+        let tempdir = TempDir::new().map_err(OpenDirectoryError::IoError)?;
        let tempdir_path = PathBuf::from(tempdir.path());
-        let directory = MmapDirectory {
-            root_path: tempdir_path,
-            mmap_cache: Arc::new(RwLock::new(MmapCache::default())),
-            _temp_directory: Arc::new(Some(tempdir)),
-        };
-        Ok(directory)
+        Ok(MmapDirectory::new(tempdir_path, Some(tempdir)))
    }

    /// Opens a MmapDirectory in a directory.
@@ -158,18 +292,14 @@ impl MmapDirectory {
                directory_path,
            )))
        } else {
-            Ok(MmapDirectory {
-                root_path: PathBuf::from(directory_path),
-                mmap_cache: Arc::new(RwLock::new(MmapCache::default())),
-                _temp_directory: Arc::new(None),
-            })
+            Ok(MmapDirectory::new(PathBuf::from(directory_path), None))
        }
    }

    /// Joins a relative_path to the directory `root_path`
    /// to create a proper complete `filepath`.
    fn resolve_path(&self, relative_path: &Path) -> PathBuf {
-        self.root_path.join(relative_path)
+        self.inner.root_path.join(relative_path)
    }

    /// Sync the root directory.
@@ -187,14 +317,14 @@ impl MmapDirectory {
        #[cfg(windows)]
        {
            use std::os::windows::fs::OpenOptionsExt;
-            use winapi::winbase;
+            use winapi::um::winbase;

            open_opts
                .write(true)
                .custom_flags(winbase::FILE_FLAG_BACKUP_SEMANTICS);
        }

-        let fd = open_opts.open(&self.root_path)?;
+        let fd = open_opts.open(&self.inner.root_path)?;
        fd.sync_all()?;
        Ok(())
    }
@@ -204,14 +334,35 @@ impl MmapDirectory {
    ///
    /// The `MmapDirectory` embeds a `MmapDirectory`
    /// to avoid multiplying the `mmap` system calls.
-    pub fn get_cache_info(&mut self) -> CacheInfo {
-        self.mmap_cache
+    pub fn get_cache_info(&self) -> CacheInfo {
+        self.inner
+            .mmap_cache
            .write()
+            .expect("mmap cache lock is poisoned")
+            .remove_weak_ref();
+        self.inner
+            .mmap_cache
+            .read()
            .expect("Mmap cache lock is poisoned.")
            .get_info()
    }
 }

+/// We rely on fs2 for file locking. On Windows & MacOS this
+/// uses BSD locks (`flock`). The lock is actually released when
+/// the `File` object is dropped and its associated file descriptor
+/// is closed.
+struct ReleaseLockFile {
+    _file: File,
+    path: PathBuf,
+}
+
+impl Drop for ReleaseLockFile {
+    fn drop(&mut self) {
+        debug!("Releasing lock {:?}", self.path);
+    }
+}
+
 /// This Write wraps a File, but has the specificity of
 /// call `sync_all` on flush.
 struct SafeFileWriter(File);
@@ -239,12 +390,18 @@ impl Seek for SafeFileWriter {
    }
 }

+impl TerminatingWrite for SafeFileWriter {
+    fn terminate_ref(&mut self, _: AntiCallToken) -> io::Result<()> {
+        self.flush()
+    }
+}
+
 impl Directory for MmapDirectory {
    fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
        debug!("Open Read {:?}", path);
        let full_path = self.resolve_path(path);

-        let mut mmap_cache = self.mmap_cache.write().map_err(|_| {
+        let mut mmap_cache = self.inner.mmap_cache.write().map_err(|_| {
            let msg = format!(
                "Failed to acquired write lock \
                 on mmap cache while reading {:?}",
@@ -252,11 +409,33 @@ impl Directory for MmapDirectory {
            );
            IOError::with_path(path.to_owned(), make_io_err(msg))
        })?;
-
        Ok(mmap_cache
            .get_mmap(&full_path)?
-            .map(ReadOnlySource::Mmap)
-            .unwrap_or_else(|| ReadOnlySource::Anonymous(SharedVecSlice::empty())))
+            .map(ReadOnlySource::from)
+            .unwrap_or_else(ReadOnlySource::empty))
+    }
+
+    /// Any entry associated to the path in the mmap will be
+    /// removed before the file is deleted.
+    fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
+        let full_path = self.resolve_path(path);
+        match fs::remove_file(&full_path) {
+            Ok(_) => self
+                .sync_directory()
+                .map_err(|e| IOError::with_path(path.to_owned(), e).into()),
+            Err(e) => {
+                if e.kind() == io::ErrorKind::NotFound {
+                    Err(DeleteError::FileDoesNotExist(path.to_owned()))
+                } else {
+                    Err(IOError::with_path(path.to_owned(), e).into())
+                }
+            }
+        }
+    }
+
+    fn exists(&self, path: &Path) -> bool {
+        let full_path = self.resolve_path(path);
+        full_path.exists()
    }

    fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError> {
@@ -289,43 +468,6 @@ impl Directory for MmapDirectory {
        Ok(BufWriter::new(Box::new(writer)))
    }

-    /// Any entry associated to the path in the mmap will be
-    /// removed before the file is deleted.
-    fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
-        debug!("Deleting file {:?}", path);
-        let full_path = self.resolve_path(path);
-        let mut mmap_cache = self.mmap_cache.write().map_err(|_| {
-            let msg = format!(
-                "Failed to acquired write lock \
-                 on mmap cache while deleting {:?}",
-                path
-            );
-            IOError::with_path(path.to_owned(), make_io_err(msg))
-        })?;
-        mmap_cache.discard_from_cache(path);
-
-        // Removing the entry in the MMap cache.
-        // The munmap will appear on Drop,
-        // when the last reference is gone.
-        mmap_cache.cache.remove(&full_path);
-        match fs::remove_file(&full_path) {
-            Ok(_) => self.sync_directory()
-                .map_err(|e| IOError::with_path(path.to_owned(), e).into()),
-            Err(e) => {
-                if e.kind() == io::ErrorKind::NotFound {
-                    Err(DeleteError::FileDoesNotExist(path.to_owned()))
-                } else {
-                    Err(IOError::with_path(path.to_owned(), e).into())
-                }
-            }
-        }
-    }
-
-    fn exists(&self, path: &Path) -> bool {
-        let full_path = self.resolve_path(path);
-        full_path.exists()
-    }
-
    fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> {
        let full_path = self.resolve_path(path);
        let mut buffer = Vec::new();
@@ -353,8 +495,28 @@ impl Directory for MmapDirectory {
        Ok(())
    }

-    fn box_clone(&self) -> Box<Directory> {
-        Box::new(self.clone())
+    fn acquire_lock(&self, lock: &Lock) -> Result<DirectoryLock, LockError> {
+        let full_path = self.resolve_path(&lock.filepath);
+        // We make sure that the file exists.
+        let file: File = OpenOptions::new()
+            .write(true)
+            .create(true) //< if the file does not exist yet, create it.
+            .open(&full_path)
+            .map_err(LockError::IOError)?;
+        if lock.is_blocking {
+            file.lock_exclusive().map_err(LockError::IOError)?;
+        } else {
+            file.try_lock_exclusive().map_err(|_| LockError::LockBusy)?
+        }
+        // dropping the file handle will release the lock.
+        Ok(DirectoryLock::from(Box::new(ReleaseLockFile {
+            path: lock.filepath.clone(),
+            _file: file,
+        })))
+    }
+
+    fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
+        self.inner.watch(watch_callback)
    }
 }

@@ -365,6 +527,17 @@ mod tests {
    // The following tests are specific to the MmapDirectory

    use super::*;
+    use crate::indexer::LogMergePolicy;
+    use crate::schema::{Schema, SchemaBuilder, TEXT};
+    use crate::Index;
+    use crate::ReloadPolicy;
+    use std::fs;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
+    #[test]
+    fn test_open_non_existent_path() {
+        assert!(MmapDirectory::open(PathBuf::from("./nowhere")).is_err());
+    }

    #[test]
    fn test_open_empty() {
@@ -384,7 +557,7 @@ mod tests {

    #[test]
    fn test_cache() {
-        let content = "abc".as_bytes();
+        let content = b"abc";

        // here we test if the cache releases
        // mmaps correctly.
@@ -400,26 +573,117 @@ mod tests {
                w.flush().unwrap();
            }
        }
-        {
-            for (i, path) in paths.iter().enumerate() {
-                let _r = mmap_directory.open_read(path).unwrap();
-                assert_eq!(mmap_directory.get_cache_info().mmapped.len(), i + 1);
-            }
-            for path in paths.iter() {
-                let _r = mmap_directory.open_read(path).unwrap();
-                assert_eq!(mmap_directory.get_cache_info().mmapped.len(), num_paths);
-            }
-            for (i, path) in paths.iter().enumerate() {
-                mmap_directory.delete(path).unwrap();
-                assert_eq!(
-                    mmap_directory.get_cache_info().mmapped.len(),
-                    num_paths - i - 1
-                );
-            }
+
+        let mut keep = vec![];
+        for (i, path) in paths.iter().enumerate() {
+            keep.push(mmap_directory.open_read(path).unwrap());
+            assert_eq!(mmap_directory.get_cache_info().mmapped.len(), i + 1);
+        }
+        assert_eq!(mmap_directory.get_cache_info().counters.hit, 0);
+        assert_eq!(mmap_directory.get_cache_info().counters.miss, 10);
+        assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 10);
+        for path in paths.iter() {
+            let _r = mmap_directory.open_read(path).unwrap();
+            assert_eq!(mmap_directory.get_cache_info().mmapped.len(), num_paths);
        }
        assert_eq!(mmap_directory.get_cache_info().counters.hit, 10);
        assert_eq!(mmap_directory.get_cache_info().counters.miss, 10);
+        assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 10);
+
+        for path in paths.iter() {
+            let _r = mmap_directory.open_read(path).unwrap();
+            assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 10);
+        }
+
+        assert_eq!(mmap_directory.get_cache_info().counters.hit, 20);
+        assert_eq!(mmap_directory.get_cache_info().counters.miss, 10);
+        assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 10);
+        drop(keep);
+        for path in paths.iter() {
+            let _r = mmap_directory.open_read(path).unwrap();
+            assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 1);
+        }
+        assert_eq!(mmap_directory.get_cache_info().counters.hit, 20);
+        assert_eq!(mmap_directory.get_cache_info().counters.miss, 20);
+        assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 0);
+
+        for path in &paths {
+            mmap_directory.delete(path).unwrap();
+        }
+        assert_eq!(mmap_directory.get_cache_info().counters.hit, 20);
+        assert_eq!(mmap_directory.get_cache_info().counters.miss, 20);
+        assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 0);
+        for path in paths.iter() {
+            assert!(mmap_directory.open_read(path).is_err());
+        }
+        assert_eq!(mmap_directory.get_cache_info().counters.hit, 20);
+        assert_eq!(mmap_directory.get_cache_info().counters.miss, 30);
        assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 0);
    }

+    #[test]
+    fn test_watch_wrapper() {
+        let counter: Arc<AtomicUsize> = Default::default();
+        let counter_clone = counter.clone();
+        let tmp_dir = tempfile::TempDir::new().unwrap();
+        let tmp_dirpath = tmp_dir.path().to_owned();
+        let mut watch_wrapper = WatcherWrapper::new(&tmp_dirpath).unwrap();
+        let tmp_file = tmp_dirpath.join(*META_FILEPATH);
+        let _handle = watch_wrapper.watch(Box::new(move || {
+            counter_clone.fetch_add(1, Ordering::SeqCst);
+        }));
+        let (sender, receiver) = crossbeam::channel::unbounded();
+        let _handle2 = watch_wrapper.watch(Box::new(move || {
+            let _ = sender.send(());
+        }));
+        assert_eq!(counter.load(Ordering::SeqCst), 0);
+        fs::write(&tmp_file, b"whateverwilldo").unwrap();
+        assert!(receiver.recv().is_ok());
+        assert!(counter.load(Ordering::SeqCst) >= 1);
+    }
+
+    #[test]
+    fn test_mmap_released() {
+        let mmap_directory = MmapDirectory::create_from_tempdir().unwrap();
+        let mut schema_builder: SchemaBuilder = Schema::builder();
+        let text_field = schema_builder.add_text_field("text", TEXT);
+        let schema = schema_builder.build();
+
+        {
+            let index = Index::create(mmap_directory.clone(), schema).unwrap();
+
+            let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+            let mut log_merge_policy = LogMergePolicy::default();
+            log_merge_policy.set_min_merge_size(3);
+            index_writer.set_merge_policy(Box::new(log_merge_policy));
+            for _num_commits in 0..10 {
+                for _ in 0..10 {
+                    index_writer.add_document(doc!(text_field=>"abc"));
+                }
+                index_writer.commit().unwrap();
+            }
+
+            let reader = index
+                .reader_builder()
+                .reload_policy(ReloadPolicy::Manual)
+                .try_into()
+                .unwrap();
+
+            for _ in 0..4 {
+                index_writer.add_document(doc!(text_field=>"abc"));
+                index_writer.commit().unwrap();
+                reader.reload().unwrap();
+            }
+            index_writer.wait_merging_threads().unwrap();
+
+            reader.reload().unwrap();
+            let num_segments = reader.searcher().segment_readers().len();
+            assert!(num_segments <= 4);
+            assert_eq!(
+                num_segments * 7,
+                mmap_directory.get_cache_info().mmapped.len()
+            );
+        }
+        assert!(mmap_directory.get_cache_info().mmapped.is_empty());
+    }
 }
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -8,158 +8,89 @@ WORM directory abstraction.
 mod mmap_directory;

 mod directory;
+mod directory_lock;
+mod footer;
 mod managed_directory;
 mod ram_directory;
 mod read_only_source;
-mod shared_vec_slice;
+mod watch_event_router;

 /// Errors specific to the directory module.
 pub mod error;

-use std::io::{BufWriter, Seek, Write};
-
-pub use self::directory::Directory;
+pub use self::directory::DirectoryLock;
+pub use self::directory::{Directory, DirectoryClone};
+pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
 pub use self::ram_directory::RAMDirectory;
 pub use self::read_only_source::ReadOnlySource;
+pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle};
+use std::io::{self, BufWriter, Write};
+use std::path::PathBuf;
+/// Outcome of the Garbage collection
+pub struct GarbageCollectionResult {
+    /// List of files that were deleted in this cycle
+    pub deleted_files: Vec<PathBuf>,
+    /// List of files that were schedule to be deleted in this cycle,
+    /// but deletion did not work. This typically happens on windows,
+    /// as deleting a memory mapped file is forbidden.
+    ///
+    /// If a searcher is still held, a file cannot be deleted.
+    /// This is not considered a bug, the file will simply be deleted
+    /// in the next GC.
+    pub failed_to_delete_files: Vec<PathBuf>,
+}

 #[cfg(feature = "mmap")]
 pub use self::mmap_directory::MmapDirectory;

-pub(crate) use self::managed_directory::ManagedDirectory;
+pub use self::managed_directory::ManagedDirectory;

-/// Synonym of Seek + Write
-pub trait SeekableWrite: Seek + Write {}
-impl<T: Seek + Write> SeekableWrite for T {}
+/// Struct used to prevent from calling [`terminate_ref`](trait.TerminatingWrite#method.terminate_ref) directly
+///
+/// The point is that while the type is public, it cannot be built by anyone
+/// outside of this module.
+pub struct AntiCallToken(());
+
+/// Trait used to indicate when no more write need to be done on a writer
+pub trait TerminatingWrite: Write {
+    /// Indicate that the writer will no longer be used. Internally call terminate_ref.
+    fn terminate(mut self) -> io::Result<()>
+    where
+        Self: Sized,
+    {
+        self.terminate_ref(AntiCallToken(()))
+    }
+
+    /// You should implement this function to define custom behavior.
+    /// This function should flush any buffer it may hold.
+    fn terminate_ref(&mut self, _: AntiCallToken) -> io::Result<()>;
+}
+
+impl<W: TerminatingWrite + ?Sized> TerminatingWrite for Box<W> {
+    fn terminate_ref(&mut self, token: AntiCallToken) -> io::Result<()> {
+        self.as_mut().terminate_ref(token)
+    }
+}
+
+impl<W: TerminatingWrite> TerminatingWrite for BufWriter<W> {
+    fn terminate_ref(&mut self, a: AntiCallToken) -> io::Result<()> {
+        self.flush()?;
+        self.get_mut().terminate_ref(a)
+    }
+}
+
+#[cfg(test)]
+impl<'a> TerminatingWrite for &'a mut Vec<u8> {
+    fn terminate_ref(&mut self, _a: AntiCallToken) -> io::Result<()> {
+        self.flush()
+    }
+}

 /// Write object for Directory.
 ///
 /// `WritePtr` are required to implement both Write
 /// and Seek.
-pub type WritePtr = BufWriter<Box<SeekableWrite>>;
+pub type WritePtr = BufWriter<Box<dyn TerminatingWrite>>;

 #[cfg(test)]
-mod tests {
-
-    use super::*;
-    use std::io::{Seek, SeekFrom, Write};
-    use std::path::Path;
-
-    lazy_static! {
-        static ref TEST_PATH: &'static Path = Path::new("some_path_for_test");
-    }
-
-    #[test]
-    fn test_ram_directory() {
-        let mut ram_directory = RAMDirectory::create();
-        test_directory(&mut ram_directory);
-    }
-
-    #[test]
-    #[cfg(feature = "mmap")]
-    fn test_mmap_directory() {
-        let mut mmap_directory = MmapDirectory::create_from_tempdir().unwrap();
-        test_directory(&mut mmap_directory);
-    }
-
-    #[test]
-    #[should_panic]
-    fn ram_directory_panics_if_flush_forgotten() {
-        let mut ram_directory = RAMDirectory::create();
-        let mut write_file = ram_directory.open_write(*TEST_PATH).unwrap();
-        assert!(write_file.write_all(&[4]).is_ok());
-    }
-
-    fn test_simple(directory: &mut Directory) {
-        {
-            {
-                let mut write_file = directory.open_write(*TEST_PATH).unwrap();
-                assert!(directory.exists(*TEST_PATH));
-                write_file.write_all(&[4]).unwrap();
-                write_file.write_all(&[3]).unwrap();
-                write_file.write_all(&[7, 3, 5]).unwrap();
-                write_file.flush().unwrap();
-            }
-            let read_file = directory.open_read(*TEST_PATH).unwrap();
-            let data: &[u8] = &*read_file;
-            assert_eq!(data, &[4u8, 3u8, 7u8, 3u8, 5u8]);
-        }
-
-        assert!(directory.delete(*TEST_PATH).is_ok());
-        assert!(!directory.exists(*TEST_PATH));
-    }
-
-    fn test_seek(directory: &mut Directory) {
-        {
-            {
-                let mut write_file = directory.open_write(*TEST_PATH).unwrap();
-                write_file.write_all(&[4, 3, 7, 3, 5]).unwrap();
-                write_file.seek(SeekFrom::Start(0)).unwrap();
-                write_file.write_all(&[3, 1]).unwrap();
-                write_file.flush().unwrap();
-            }
-            let read_file = directory.open_read(*TEST_PATH).unwrap();
-            let data: &[u8] = &*read_file;
-            assert_eq!(data, &[3u8, 1u8, 7u8, 3u8, 5u8]);
-        }
-
-        assert!(directory.delete(*TEST_PATH).is_ok());
-    }
-
-    fn test_rewrite_forbidden(directory: &mut Directory) {
-        {
-            directory.open_write(*TEST_PATH).unwrap();
-            assert!(directory.exists(*TEST_PATH));
-        }
-        {
-            assert!(directory.open_write(*TEST_PATH).is_err());
-        }
-        assert!(directory.delete(*TEST_PATH).is_ok());
-    }
-
-    fn test_write_create_the_file(directory: &mut Directory) {
-        {
-            assert!(directory.open_read(*TEST_PATH).is_err());
-            let _w = directory.open_write(*TEST_PATH).unwrap();
-            assert!(directory.exists(*TEST_PATH));
-            assert!(directory.open_read(*TEST_PATH).is_ok());
-            assert!(directory.delete(*TEST_PATH).is_ok());
-        }
-    }
-
-    fn test_directory_delete(directory: &mut Directory) {
-        assert!(directory.open_read(*TEST_PATH).is_err());
-        let mut write_file = directory.open_write(*TEST_PATH).unwrap();
-        write_file.write_all(&[1, 2, 3, 4]).unwrap();
-        write_file.flush().unwrap();
-        {
-            let read_handle = directory.open_read(*TEST_PATH).unwrap();
-            {
-                assert_eq!(&*read_handle, &[1u8, 2u8, 3u8, 4u8]);
-
-                // Mapped files can't be deleted on Windows
-                if !cfg!(windows) {
-                    assert!(directory.delete(*TEST_PATH).is_ok());
-                    assert_eq!(&*read_handle, &[1u8, 2u8, 3u8, 4u8]);
-                }
-
-                assert!(directory.delete(Path::new("SomeOtherPath")).is_err());
-            }
-        }
-
-        if cfg!(windows) {
-            assert!(directory.delete(*TEST_PATH).is_ok());
-        }
-
-        assert!(directory.open_read(*TEST_PATH).is_err());
-        assert!(directory.delete(*TEST_PATH).is_err());
-    }
-
-    fn test_directory(directory: &mut Directory) {
-        test_simple(directory);
-        test_seek(directory);
-        test_rewrite_forbidden(directory);
-        test_write_create_the_file(directory);
-        test_directory_delete(directory);
-    }
-
-}
+mod tests;
--- a/Show More
+++ b/Show More