Refactoring

Closes #476
Added a broken unit test
2025-12-28 04:52:55 +00:00 · 2019-01-23 10:06:40 +09:00 · 2019-01-23 10:06:39 +09:00 · 2019-01-23 10:04:27 +09:00 · 2018-12-26 10:18:34 +09:00 · 2018-12-26 10:11:06 +09:00
185 changed files with 8552 additions and 5063 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,19 @@
+---
+name: Bug report
+about: Create a report to help us improve
+
+---
+
+**Describe the bug**
+- What did you do?
+- What happened?
+- What was expected?
+
+**Which version of tantivy are you using?**
+If "master",  ideally give the specific sha1 revision.
+
+**To Reproduce**
+
+If your bug is deterministic, can you give a minimal reproducing code?
+Some bugs are not deterministic. Can you describe with precision in which context it happened?
+If this is possible, can you share your code?
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,14 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**[Optional] describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
--- a/.github/ISSUE_TEMPLATE/question.md
+++ b/.github/ISSUE_TEMPLATE/question.md
@@ -0,0 +1,7 @@
+---
+name: Question
+about: Ask any question about tantivy's usage...
+
+---
+
+Try to be specific about your use case...
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+tantivy.iml
 *.swp
 target
 target/debug
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,74 +9,42 @@ sudo: required
 env:
  global:
    - CRATE_NAME=tantivy
+    - TRAVIS_CARGO_NIGHTLY_FEATURE=""
+    - secure: eC8HjTi1wgRVCsMAeXEXt8Ckr0YBSGOEnQkkW4/Nde/OZ9jJjz2nmP1ELQlDE7+czHub2QvYtDMG0parcHZDx/Kus0yvyn08y3g2rhGIiE7y8OCvQm1Mybu2D/p7enm6shXquQ6Z5KRfRq+18mHy80wy9ABMA/ukEZdvnfQ76/Een8/Lb0eHaDoXDXn3PqLVtByvSfQQ7OhS60dEScu8PWZ6/l1057P5NpdWbMExBE7Ro4zYXNhkJeGZx0nP/Bd4Jjdt1XfPzMEybV6NZ5xsTILUBFTmOOt603IsqKGov089NExqxYu5bD3K+S4MzF1Nd6VhomNPJqLDCfhlymJCUj5n5Ku4yidlhQbM4Ej9nGrBalJnhcjBjPua5tmMF2WCxP9muKn/2tIOu1/+wc0vMf9Yd3wKIkf5+FtUxCgs2O+NslWvmOMAMI/yD25m7hb4t1IwE/4Bk+GVcWJRWXbo0/m6ZUHzRzdjUY2a1qvw7C9udzdhg7gcnXwsKrSWi2NjMiIVw86l+Zim0nLpKIN41sxZHLaFRG63Ki8zQ/481LGn32awJ6i3sizKS0WD+N1DfR2qYMrwYHaMN0uR0OFXYTJkFvTFttAeUY3EKmRKAuMhmO2YRdSr4/j/G5E9HMc1gSGJj6PxgpQU7EpvxRsmoVAEJr0mszmOj9icGHep/FM=
+
+addons:
+  apt:
+    sources:
+      - ubuntu-toolchain-r-test
+      - kalakris-cmake
+    packages:
+      - gcc-4.8
+      - g++-4.8
+      - libcurl4-openssl-dev
+      - libelf-dev
+      - libdw-dev
+      - binutils-dev
+      - cmake

 matrix:
  include:
    # Android
    - env: TARGET=aarch64-linux-android DISABLE_TESTS=1
-    - env: TARGET=arm-linux-androideabi DISABLE_TESTS=1
-    - env: TARGET=armv7-linux-androideabi DISABLE_TESTS=1
-    - env: TARGET=i686-linux-android DISABLE_TESTS=1
-    - env: TARGET=x86_64-linux-android DISABLE_TESTS=1
-
-    # iOS
-    #- env: TARGET=aarch64-apple-ios DISABLE_TESTS=1
-    #  os: osx
-    #- env: TARGET=armv7-apple-ios DISABLE_TESTS=1
-    #  os: osx
-    #- env: TARGET=armv7s-apple-ios DISABLE_TESTS=1
-    #  os: osx
-    #- env: TARGET=i386-apple-ios DISABLE_TESTS=1
-    #  os: osx
-    - env: TARGET=x86_64-apple-ios DISABLE_TESTS=1
-      os: osx
+    #- env: TARGET=arm-linux-androideabi DISABLE_TESTS=1
+    #- env: TARGET=armv7-linux-androideabi DISABLE_TESTS=1
+    #- env: TARGET=i686-linux-android DISABLE_TESTS=1
+    #- env: TARGET=x86_64-linux-android DISABLE_TESTS=1

    # Linux
-    - env: TARGET=aarch64-unknown-linux-gnu
-    # - env: TARGET=arm-unknown-linux-gnueabi
-    # - env: TARGET=armv7-unknown-linux-gnueabihf
-    - env: TARGET=i686-unknown-linux-gnu
-    #- env: TARGET=i686-unknown-linux-musl
-    #- env: TARGET=mips-unknown-linux-gnu
-    #- env: TARGET=mips64-unknown-linux-gnuabi64
-    #- env: TARGET=mips64el-unknown-linux-gnuabi64
-    #- env: TARGET=mipsel-unknown-linux-gnu
-    #- env: TARGET=powerpc-unknown-linux-gnu
-    #- env: TARGET=powerpc64-unknown-linux-gnu
-    #- env: TARGET=powerpc64le-unknown-linux-gnu
-    #- env: TARGET=s390x-unknown-linux-gnu DISABLE_TESTS=1
-    - env: TARGET=x86_64-unknown-linux-gnu
-    - env: TARGET=x86_64-unknown-linux-musl
+    #- env: TARGET=aarch64-unknown-linux-gnu
+    #- env: TARGET=i686-unknown-linux-gnu
+    - env: TARGET=x86_64-unknown-linux-gnu CODECOV=1
+    # - env: TARGET=x86_64-unknown-linux-musl CODECOV=1

    # OSX
-    #- env: TARGET=i686-apple-darwin
-    #  os: osx
    - env: TARGET=x86_64-apple-darwin
      os: osx

-    # *BSD
-    #- env: TARGET=i686-unknown-freebsd DISABLE_TESTS=1
-    #- env: TARGET=x86_64-unknown-freebsd DISABLE_TESTS=1
-    #- env: TARGET=x86_64-unknown-netbsd DISABLE_TESTS=1
-
-    # Windows
-    #- env: TARGET=x86_64-pc-windows-gnu
-
-    # Bare metal
-    # These targets don't support std and as such are likely not suitable for
-    # most crates.
-    # - env: TARGET=thumbv6m-none-eabi
-    # - env: TARGET=thumbv7em-none-eabi
-    # - env: TARGET=thumbv7em-none-eabihf
-    # - env: TARGET=thumbv7m-none-eabi
-
-    # Testing other channels
-    #- env: TARGET=x86_64-unknown-linux-gnu
-    #  rust: nightly
-    #- env: TARGET=x86_64-apple-darwin
-    #  os: osx
-    #  rust: nightly
-
 before_install:
  - set -e
  - rustup self update
@@ -85,31 +53,16 @@ install:
  - sh ci/install.sh
  - source ~/.cargo/env || true

+before_script:
+  - export PATH=$HOME/.cargo/bin:$PATH
+  - cargo install cargo-update || echo "cargo-update already installed"
+  - cargo install cargo-travis || echo "cargo-travis already installed"
+
 script:
  - bash ci/script.sh

-after_script: set +e
-
 before_deploy:
  - sh ci/before_deploy.sh
-#
-#deploy:
-#  # - Create a `public_repo` GitHub token. Go to: https://github.com/settings/tokens/new
-#  # - Encrypt it: `travis encrypt 0123456789012345678901234567890123456789
-#  # - Paste the output down here
-#  api_key:
-#    secure: eC8HjTi1wgRVCsMAeXEXt8Ckr0YBSGOEnQkkW4/Nde/OZ9jJjz2nmP1ELQlDE7+czHub2QvYtDMG0parcHZDx/Kus0yvyn08y3g2rhGIiE7y8OCvQm1Mybu2D/p7enm6shXquQ6Z5KRfRq+18mHy80wy9ABMA/ukEZdvnfQ76/Een8/Lb0eHaDoXDXn3PqLVtByvSfQQ7OhS60dEScu8PWZ6/l1057P5NpdWbMExBE7Ro4zYXNhkJeGZx0nP/Bd4Jjdt1XfPzMEybV6NZ5xsTILUBFTmOOt603IsqKGov089NExqxYu5bD3K+S4MzF1Nd6VhomNPJqLDCfhlymJCUj5n5Ku4yidlhQbM4Ej9nGrBalJnhcjBjPua5tmMF2WCxP9muKn/2tIOu1/+wc0vMf9Yd3wKIkf5+FtUxCgs2O+NslWvmOMAMI/yD25m7hb4t1IwE/4Bk+GVcWJRWXbo0/m6ZUHzRzdjUY2a1qvw7C9udzdhg7gcnXwsKrSWi2NjMiIVw86l+Zim0nLpKIN41sxZHLaFRG63Ki8zQ/481LGn32awJ6i3sizKS0WD+N1DfR2qYMrwYHaMN0uR0OFXYTJkFvTFttAeUY3EKmRKAuMhmO2YRdSr4/j/G5E9HMc1gSGJj6PxgpQU7EpvxRsmoVAEJr0mszmOj9icGHep/FM=
-#  file_glob: true
-#  file: $CRATE_NAME-$TRAVIS_TAG-$TARGET.*
-#  on:
-#    # TODO Here you can pick which targets will generate binary releases
-#    # In this example, there are some targets that are tested using the stable
-#    # and nightly channels. This condition makes sure there is only one release
-#    # for such targets and that's generated using the stable channel
-#    condition: $TRAVIS_RUST_VERSION = stable
-#    tags: true
-#  provider: releases
-#  skip_cleanup: true

 cache: cargo
 before_cache:
@@ -124,4 +77,4 @@ before_cache:

 notifications:
  email:
-    on_success: never
+    on_success: never
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,25 @@
+Tantivy 0.8.0
+=====================
+*No change in the index format*
+- API Breaking change in the collector API. (@jwolfe, @fulmicoton)
+- Multithreaded search (@jwolfe, @fulmicoton) 
+
+
+Tantivy 0.7.1
+=====================
+*No change in the index format*
+- Bugfix: NGramTokenizer panics on non ascii chars
+- Added a space usage API
+
+Tantivy 0.7
+=====================
+- Skip data for doc ids and positions (@fulmicoton),
+  greatly improving performance
+- Tantivy error now rely on the failure crate (@drusellers)
+- Added support for `AND`, `OR`, `NOT` syntax in addition to the `+`,`-` syntax
+- Added a snippet generator with highlight (@vigneshsarma, @fulmicoton)
+- Added a `TopFieldCollector` (@pentlander)
+
 Tantivy 0.6.1
 =========================
 - Bugfix #324. GC removing was removing file that were still in useful
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.6.1"
+version = "0.8.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -12,67 +12,70 @@ readme = "README.md"
 keywords = ["search", "information", "retrieval"]

 [dependencies]
-base64 = "0.9.1"
+base64 = "0.10.0"
 byteorder = "1.0"
-lazy_static = "0.2.1"
-tinysegmenter = "0.1.0"
-regex = "0.2"
+lazy_static = "1"
+regex = "1.0"
 fst = {version="0.3", default-features=false}
 fst-regex = { version="0.2" }
 lz4 = {version="1.20", optional=true}
 snap = {version="0.2"}
 atomicwrites = {version="0.2.2", optional=true}
-tempfile = "2.1"
-log = "0.3.6"
-combine = "2.2"
+tempfile = "3.0"
+log = "0.4"
+combine = "3"
 tempdir = "0.3"
 serde = "1.0"
 serde_derive = "1.0"
 serde_json = "1.0"
 num_cpus = "1.2"
-itertools = "0.5.9"
+itertools = "0.8"
 levenshtein_automata = {version="0.1", features=["fst_automaton"]}
-bit-set = "0.4.0"
-uuid = { version = "0.6", features = ["v4", "serde"] }
-chan = "0.1"
-crossbeam = "0.3"
+bit-set = "0.5"
+uuid = { version = "0.7", features = ["v4", "serde"] }
+crossbeam = "0.5"
 futures = "0.1"
 futures-cpupool = "0.1"
-error-chain = "0.8"
-owning_ref = "0.3"
+owning_ref = "0.4"
 stable_deref_trait = "1.0.0"
-rust-stemmers = "0.1.0"
+rust-stemmers = "1"
 downcast = { version="0.9" }
 matches = "0.1"
 bitpacking = "0.5"
 census = "0.1"
 fnv = "1.0.6"
-owned-read = "0.1"
+owned-read = "0.4"
+failure = "0.1"
+htmlescape = "0.3.1"
+fail = "0.2"
+scoped-pool = "1.0"
+murmurhash32 = "0.2"

 [target.'cfg(windows)'.dependencies]
 winapi = "0.2"

 [dev-dependencies]
-rand = "0.3"
-env_logger = "0.4"
+rand = "0.6"
+maplit = "1"

 [profile.release]
 opt-level = 3
 debug = false
-lto = true
 debug-assertions = false

+[profile.test]
+debug-assertions = true
+overflow-checks = true
+
 [features]
-default = ["mmap"]
+# by default no-fail is disabled. We manually enable it when running test.
+default = ["mmap", "no_fail"]
 mmap = ["fst/mmap", "atomicwrites"]
 lz4-compression = ["lz4"]
+no_fail = ["fail/no_fail"]
+unstable = [] # useful for benches.

 [badges]
 travis-ci = { repository = "tantivy-search/tantivy" }

-[[example]]
-name = "simple_search"
-required-features = ["mmap"]

-[[example]]
-name = "custom_tokenizer"
--- a/README.md
+++ b/README.md
@@ -1,14 +1,27 @@
-![Tantivy](https://tantivy-search.github.io/logo/tantivy-logo.png)

 [![Build Status](https://travis-ci.org/tantivy-search/tantivy.svg?branch=master)](https://travis-ci.org/tantivy-search/tantivy)
-[![Coverage Status](https://coveralls.io/repos/github/tantivy-search/tantivy/badge.svg?branch=master&refresh1)](https://coveralls.io/github/tantivy-search/tantivy?branch=master)
+[![codecov](https://codecov.io/gh/tantivy-search/tantivy/branch/master/graph/badge.svg)](https://codecov.io/gh/tantivy-search/tantivy)
 [![Join the chat at https://gitter.im/tantivy-search/tantivy](https://badges.gitter.im/tantivy-search/tantivy.svg)](https://gitter.im/tantivy-search/tantivy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Build status](https://ci.appveyor.com/api/projects/status/r7nb13kj23u8m9pj/branch/master?svg=true)](https://ci.appveyor.com/project/fulmicoton/tantivy/branch/master)
+[![Say Thanks!](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/fulmicoton)
+
+![Tantivy](https://tantivy-search.github.io/logo/tantivy-logo.png)
+
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/0)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/0)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/1)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/1)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/2)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/2)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/3)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/3)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/4)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/4)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/5)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/5)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/6)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/6)
+[![](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/images/7)](https://sourcerer.io/fame/fulmicoton/tantivy-search/tantivy/links/7)
+
+

 **Tantivy** is a **full text search engine library** written in rust.

-It is closer to Lucene than to Elastic Search and Solr in the sense it is not
+It is closer to [Apache Lucene](https://lucene.apache.org/) than to [Elastic Search](https://www.elastic.co/products/elasticsearch) and [Apache Solr](https://lucene.apache.org/solr/) in the sense it is not
 an off-the-shelf search engine server, but rather a crate that can be used
 to build such a search engine.

@@ -17,10 +30,11 @@ Tantivy is, in fact, strongly inspired by Lucene's design.
 # Features

 - Full-text search
+- Fast (check out the :racehorse: :sparkles: [benchmark](https://tantivy-search.github.io/bench/) :sparkles: :racehorse:)
 - Tiny startup time (<10ms), perfect for command line tools
 - BM25 scoring (the same as lucene)
- Basic query language (`+michael +jackson`)
- Phrase queries search (\"michael jackson\"`)
+- Natural query language `(michael AND jackson) OR "king of pop"`
+- Phrase queries search (`"michael jackson"`)
 - Incremental indexing
 - Multithreaded indexing (indexing English Wikipedia takes < 3 minutes on my desktop)
 - Mmap directory
@@ -30,12 +44,14 @@ Tantivy is, in fact, strongly inspired by Lucene's design.
 - LZ4 compressed document store
 - Range queries
 - Faceted search
- Configurable indexing (optional term frequency and position indexing
+- Configurable indexing (optional term frequency and position indexing)
 - Cheesy logo with a horse

 # Non-features

- Distributed search and will not be in the scope of tantivy.
+- Distributed search is out of the scope of tantivy. That being said, tantivy is meant as a
+library upon which one could build a distributed search. Serializable/mergeable collector state for instance, 
+are within the scope of tantivy.


 # Supported OS and compiler
@@ -64,6 +80,10 @@ To check out and run tests, you can simply run :
    cd tantivy
    cargo build

+## Running tests
+
+Some tests will not run with just `cargo test` because of `fail-rs`.
+To run the tests exhaustively, run `./run-tests.sh`. 

 # Contribute

--- a/appveyor.yml
+++ b/appveyor.yml
@@ -18,5 +18,5 @@ install:
 build: false

 test_script:
-  - REM SET RUST_LOG=tantivy,test & cargo test --verbose
-  - REM SET RUST_BACKTRACE=1 & cargo run --example simple_search
+  - REM SET RUST_LOG=tantivy,test & cargo test --verbose --no-default-features --features mmap -- --test-threads 1
+  - REM SET RUST_BACKTRACE=1 & cargo build --examples
--- a/ci/script.sh
+++ b/ci/script.sh
@@ -1,20 +1,26 @@
+#!/usr/bin/env bash
+
 # This script takes care of testing your crate

 set -ex

 main() {
-    cross build --target $TARGET
-    cross build --target $TARGET --release
-
-    if [ ! -z $DISABLE_TESTS ]; then
-        return
+    if [ ! -z $CODECOV ]; then
+        echo "Codecov"
+        cargo build --verbose && cargo coverage --verbose && bash <(curl -s https://codecov.io/bash) -s target/kcov
+    else
+        echo "Build"
+        cross build --target $TARGET
+        if [ ! -z $DISABLE_TESTS ]; then
+            return
+        fi
+        echo "Test"
+        cross test --target $TARGET --no-default-features --features mmap -- --test-threads 1
    fi
-
-    cross test --target $TARGET
-    # cross test --target $TARGET --release
-
-    # cross run --target $TARGET
-    # cross run --target $TARGET --release
+    for example in $(ls examples/*.rs)
+    do
+        cargo run --example  $(basename $example .rs)
+    done
 }

 # we don't run the "test phase" when doing deploys
--- a/doc/.gitignore
+++ b/doc/.gitignore
@@ -0,0 +1 @@
+book
--- a/doc/book.toml
+++ b/doc/book.toml
@@ -0,0 +1,5 @@
+[book]
+authors = ["Paul Masurel"]
+multilingual = false
+src = "src"
+title = "Tantivy, the user guide"
--- a/doc/src/SUMMARY.md
+++ b/doc/src/SUMMARY.md
@@ -0,0 +1,15 @@
+# Summary
+
+
+
+[Avant Propos](./avant-propos.md)
+
+- [Segments](./basis.md)
+- [Defining your schema](./schema.md)
+- [Facetting](./facetting.md)
+- [Innerworkings](./innerworkings.md)
+  - [Inverted index](./inverted_index.md)
+- [Best practise](./inverted_index.md)
+
+[Frequently Asked Questions](./faq.md)
+[Examples](./examples.md)
--- a/doc/src/avant-propos.md
+++ b/doc/src/avant-propos.md
@@ -0,0 +1,34 @@
+# Foreword, what is the scope of tantivy?
+
+> Tantivy is a **search** engine **library** for Rust.
+
+If you are familiar with Lucene, it's an excellent approximation to consider tantivy as Lucene for rust. tantivy is heavily inspired by Lucene's design and
+they both have the same scope and targetted use cases.
+
+If you are not familiar with Lucene, let's break down our little tagline.
+
+- **Search** here means full-text search : fundamentally, tantivy is here to help you
+identify efficiently what are the documents matching a given query in your corpus.
+But modern search UI are so much more : text processing, facetting, autocomplete, fuzzy search, good
+relevancy, collapsing, highlighting, spatial search.
+
+  While some of these features are not available in tantivy yet, all of these are relevant
+  feature requests. Tantivy's objective is to offer a solid toolbox to create the best search
+  experience. But keep in mind this is just a toolbox.
+  Which bring us to the second keyword...
+
+- **Library** means that you will have to write code. tantivy is not an *all-in-one* server solution like elastic search for instance.
+
+  Sometimes a functionality will not be available in tantivy because it is too
+  specific to your use case. By design, tantivy should make it possible to extend
+  the available set of features using the existing rock-solid datastructures.
+
+  Most frequently this will mean writing your own `Collector`, your own `Scorer` or your own
+  `TokenFilter`... Some of your requirements may also be related to
+  something closer to architecture or operations. For instance, you may
+  want to build a large corpus on Hadoop, fine-tune the merge policy to keep your
+  index sharded in a time-wise fashion, or you may want to convert and existing
+  index from a different format.
+
+  Tantivy exposes a lot of low level API to do all of these things.
+  
--- a/doc/src/basis.md
+++ b/doc/src/basis.md
@@ -0,0 +1,77 @@
+# Anatomy of an index
+
+## Straight from disk
+
+Tantivy accesses its data using an abstracting trait called `Directory`.
+In theory, one can come and override the data access logic. In practise, the
+trait somewhat assumes that your data can be mapped to memory, and tantivy
+seems deeply married to using `mmap` for its io [^1], and the only persisting
+directory shipped with tantivy is the `MmapDirectory`.
+
+While this design has some downsides, this greatly simplifies the source code of
+tantivy. Caching is also entirely delegated to the OS.
+
+`tantivy` works entirely (or almost) by directly reading the datastructures as they are layed on disk. As a result, the act of opening an indexing does not involve loading different datastructures from the disk into random access memory : starting a process, opening an index, and performing your first query can typically be done in a matter of milliseconds.
+
+This is an interesting property for a command line search engine, or for some multi-tenant log search engine : spawning a new process for each new query can be a perfectly sensible solution in some use case.
+
+In later chapters, we will discuss tantivy's inverted index data layout.
+One key take away is that to achieve great performance, search indexes are extremely compact.
+Of course this is crucial to reduce IO, and ensure that as much of our index can sit in RAM.
+
+Also, whenever possible its data is accessed sequentially. Of course, this is an amazing property when tantivy needs to access the data from your spinning hard disk, but this is also
+critical for performance, if your data is read from and an `SSD` or even already in your pagecache.
+
+
+## Segments, and the log method
+
+That kind of compact layout comes at one cost: it prevents our datastructures from being dynamic.
+In fact, the `Directory` trait does not even allow you to modify part of a file.
+
+To allow the addition / deletion of documents, and create the illusion that
+your index is dynamic (i.e.: adding and deleting documents), tantivy uses a common database trick sometimes referred to as the *log method*.
+
+Let's forget about deletes for a moment.
+
+As you add documents, these documents are processed and stored in a dedicated datastructure, in a `RAM` buffer. This datastructure is not ready for search, but it is useful to receive your data and rearrange it very rapidly.
+
+As you add documents, this buffer will reach its capacity and tantivy will transparently stop adding document to it and start converting this datastructure to its final read-only format on disk. Once written, an brand empty buffer is available to resume adding documents.
+
+The resulting chunk of index obtained after this serialization is called a `Segment`.
+
+> A segment is a self-contained atomic piece of index. It is identified with a UUID, and all of its files are identified using the naming scheme : `<UUID>.*`.
+
+Which brings us to the nature of a tantivy `Index`.
+
+> A tantivy `Index` is a collection of `Segments`.
+
+Physically, this really just means and index is a bunch of segment files in a given `Directory`,
+linked together by a `meta.json` file. This transparency can become extremely handy
+to get tantivy to fit your use case:
+
+*Example 1* You could for instance use hadoop to build a very large search index in a timely manner, copy all of the resulting segment files in the same directory and edit the `meta.json` to get a functional index.[^2]
+
+*Example 2* You could also disable your merge policy and enforce daily segments. Removing data after one week can then be done very efficiently by just editing the `meta.json` and deleting the files associated to segment `D-7`.
+
+
+
+
+
+# Merging
+
+As you index more and more data, your index will accumulate more and more segments.
+Having a lot of small segments is not really optimal. There is a bit of redundancy in having
+all these term dictionary. Also when searching, we will need to do term lookups as many times as we have segments.  It can hurt search performance a bit.
+
+That's where merging or compacting comes into place. Tantivy will continuously consider merge
+opportunities and start merging segments in the background.
+
+
+# Indexing throughput, number of indexing threads
+
+
+
+
+[^1]: This may eventually change.
+
+[^2]: Be careful however. By default these files will not be considered as *managed* by tantivy. This means they will never be garbage collected by tantivy, regardless of whether they become obsolete or not.
--- a/doc/src/best_practise.md.rs
+++ b/doc/src/best_practise.md.rs
--- a/doc/src/examples.md
+++ b/doc/src/examples.md
@@ -0,0 +1,3 @@
+# Examples
+
+- [Basic search](/examples/basic_search.html)
--- a/doc/src/facetting.md
+++ b/doc/src/facetting.md
@@ -0,0 +1,5 @@
+# Facetting
+
+wewew
+
+## weeewe
--- a/doc/src/faq.md
+++ b/doc/src/faq.md
--- a/doc/src/innerworkings.md
+++ b/doc/src/innerworkings.md
@@ -0,0 +1 @@
+# Innerworkings
--- a/doc/src/inverted_index.md
+++ b/doc/src/inverted_index.md
@@ -0,0 +1 @@
+# Inverted index
--- a/doc/src/schema.md
+++ b/doc/src/schema.md
@@ -0,0 +1 @@
+# Defining your schema
--- a/examples/simple_search.rs
+++ b/examples/simple_search.rs
@@ -1,26 +1,32 @@
-extern crate tantivy;
+// # Basic Example
+//
+// This example covers the basic functionalities of
+// tantivy.
+//
+// We will :
+// - define our schema
+// = create an index in a directory
+// - index few documents in our index
+// - search for the best document matchings "sea whale"
+// - retrieve the best document original content.
+
 extern crate tempdir;

+// ---
+// Importing tantivy...
 #[macro_use]
-extern crate serde_json;
-
-use std::path::Path;
-use tantivy::collector::TopCollector;
+extern crate tantivy;
+use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
 use tantivy::schema::*;
 use tantivy::Index;
 use tempdir::TempDir;

-fn main() {
+fn main() -> tantivy::Result<()> {
    // Let's create a temporary directory for the
    // sake of this example
-    if let Ok(dir) = TempDir::new("tantivy_example_dir") {
-        run_example(dir.path()).unwrap();
-        dir.close().unwrap();
-    }
-}
+    let index_path = TempDir::new("tantivy_example_dir")?;

-fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // # Defining the schema
    //
    // The Tantivy index requires a very strict schema.
@@ -29,13 +35,13 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // be indexed".

    // first we need to define a schema ...
-    let mut schema_builder = SchemaBuilder::default();
+    let mut schema_builder = Schema::builder();

    // Our first field is title.
    // We want full-text search for it, and we also want
    // to be able to retrieve the document after the search.
    //
-    // TEXT | STORED is some syntactic sugar to describe
+    // `TEXT | STORED` is some syntactic sugar to describe
    // that.
    //
    // `TEXT` means the field should be tokenized and indexed,
@@ -64,21 +70,22 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    //
    // This will actually just save a meta.json
    // with our schema in the directory.
-    let index = Index::create_in_dir(index_path, schema.clone())?;
+    let index = Index::create_in_dir(&index_path, schema.clone())?;

    // To insert document we need an index writer.
    // There must be only one writer at a time.
    // This single `IndexWriter` is already
    // multithreaded.
    //
-    // Here we use a buffer of 50MB per thread. Using a bigger
-    // heap for the indexer can increase its throughput.
+    // Here we give tantivy a budget of `50MB`.
+    // Using a bigger heap for the indexer may increase
+    // throughput, but 50 MB is already plenty.
    let mut index_writer = index.writer(50_000_000)?;

    // Let's index our documents!
    // We first need a handle on the title and the body field.

-    // ### Create a document "manually".
+    // ### Adding documents
    //
    // We can create a document manually, by setting the fields
    // one by one in a Document object.
@@ -96,46 +103,47 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // ... and add it to the `IndexWriter`.
    index_writer.add_document(old_man_doc);

-    // ### Create a document directly from json.
-    //
-    // Alternatively, we can use our schema to parse a
-    // document object directly from json.
-    // The document is a string, but we use the `json` macro
-    // from `serde_json` for the convenience of multi-line support.
-    let json = json!({
-       "title": "Of Mice and Men",
-       "body": "A few miles south of Soledad, the Salinas River drops in close to the hillside \
-                bank and runs deep and green. The water is warm too, for it has slipped twinkling \
-                over the yellow sands in the sunlight before reaching the narrow pool. On one \
-                side of the river the golden foothill slopes curve up to the strong and rocky \
-                Gabilan Mountains, but on the valley side the water is lined with trees—willows \
-                fresh and green with every spring, carrying in their lower leaf junctures the \
-                debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
-                limbs and branches that arch over the pool"
-    });
-    let mice_and_men_doc = schema.parse_document(&json.to_string())?;
+    // For convenience, tantivy also comes with a macro to
+    // reduce the boilerplate above.
+    index_writer.add_document(doc!(
+    title => "Of Mice and Men",
+    body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
+            bank and runs deep and green. The water is warm too, for it has slipped twinkling \
+            over the yellow sands in the sunlight before reaching the narrow pool. On one \
+            side of the river the golden foothill slopes curve up to the strong and rocky \
+            Gabilan Mountains, but on the valley side the water is lined with trees—willows \
+            fresh and green with every spring, carrying in their lower leaf junctures the \
+            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
+            limbs and branches that arch over the pool"
+    ));

-    index_writer.add_document(mice_and_men_doc);
+    index_writer.add_document(doc!(
+    title => "Of Mice and Men",
+    body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
+            bank and runs deep and green. The water is warm too, for it has slipped twinkling \
+            over the yellow sands in the sunlight before reaching the narrow pool. On one \
+            side of the river the golden foothill slopes curve up to the strong and rocky \
+            Gabilan Mountains, but on the valley side the water is lined with trees—willows \
+            fresh and green with every spring, carrying in their lower leaf junctures the \
+            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
+            limbs and branches that arch over the pool"
+    ));

-    // Multi-valued field are allowed, they are
-    // expressed in JSON by an array.
-    // The following document has two titles.
-    let json = json!({
-       "title": ["Frankenstein", "The Modern Prometheus"],
-       "body": "You will rejoice to hear that no disaster has accompanied the commencement of an \
-                enterprise which you have regarded with such evil forebodings.  I arrived here \
-                yesterday, and my first task is to assure my dear sister of my welfare and \
-                increasing confidence in the success of my undertaking."
-    });
-    let frankenstein_doc = schema.parse_document(&json.to_string())?;
-
-    index_writer.add_document(frankenstein_doc);
+    // Multivalued field just need to be repeated.
+    index_writer.add_document(doc!(
+    title => "Frankenstein",
+    title => "The Modern Prometheus",
+    body => "You will rejoice to hear that no disaster has accompanied the commencement of an \
+             enterprise which you have regarded with such evil forebodings.  I arrived here \
+             yesterday, and my first task is to assure my dear sister of my welfare and \
+             increasing confidence in the success of my undertaking."
+    ));

    // This is an example, so we will only index 3 documents
    // here. You can check out tantivy's tutorial to index
    // the English wikipedia. Tantivy's indexing is rather fast.
    // Indexing 5 million articles of the English wikipedia takes
-    // around 4 minutes on my computer!
+    // around 3 minutes on my computer!

    // ### Committing
    //
@@ -160,17 +168,29 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {

    // # Searching
    //
+    // ### Searcher
+    //
    // Let's search our index. Start by reloading
    // searchers in the index. This should be done
-    // after every commit().
+    // after every `commit()`.
    index.load_searchers()?;

-    // Afterwards create one (or more) searchers.
+    // We now need to acquire a searcher.
+    // Some search experience might require more than
+    // one query.
    //
-    // You should create a searcher
-    // every time you start a "search query".
+    // The searcher ensure that we get to work
+    // with a consistent version of the index.
+    //
+    // Acquiring a `searcher` is very cheap.
+    //
+    // You should acquire a searcher every time you
+    // start processing a request and
+    // and release it right after your query is finished.
    let searcher = index.searcher();

+    // ### Query
+
    // The query parser can interpret human queries.
    // Here, if the user does not specify which
    // field they want to search, tantivy will search
@@ -193,15 +213,10 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    //
    // We are not interested in all of the documents but
    // only in the top 10. Keeping track of our top 10 best documents
-    // is the role of the TopCollector.
-    let mut top_collector = TopCollector::with_limit(10);
+    // is the role of the TopDocs.

    // We can now perform our query.
-    searcher.search(&*query, &mut top_collector)?;
-
-    // Our top collector now contains the 10
-    // most relevant doc ids...
-    let doc_addresses = top_collector.docs();
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;

    // The actual documents still need to be
    // retrieved from Tantivy's store.
@@ -210,16 +225,10 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // the document returned will only contain
    // a title.

-    for doc_address in doc_addresses {
-        let retrieved_doc = searcher.doc(&doc_address)?;
+    for (_score, doc_address) in top_docs {
+        let retrieved_doc = searcher.doc(doc_address)?;
        println!("{}", schema.to_json(&retrieved_doc));
    }

-    // Wait for indexing and merging threads to shut down.
-    // Usually this isn't needed, but in `main` we try to
-    // delete the temporary directory and that fails on
-    // Windows if the files are still open.
-    index_writer.wait_merging_threads()?;
-
    Ok(())
 }
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -0,0 +1,187 @@
+// # Custom collector example
+//
+// This example shows how you can implement your own
+// collector. As an example, we will compute a collector
+// that computes the standard deviation of a given fast field.
+//
+// Of course, you can have a look at the tantivy's built-in collectors
+// such as the `CountCollector` for more examples.
+
+extern crate tempdir;
+
+// ---
+// Importing tantivy...
+#[macro_use]
+extern crate tantivy;
+use tantivy::collector::{Collector, SegmentCollector};
+use tantivy::fastfield::FastFieldReader;
+use tantivy::query::QueryParser;
+use tantivy::schema::Field;
+use tantivy::schema::{Schema, FAST, INT_INDEXED, TEXT};
+use tantivy::Index;
+use tantivy::SegmentReader;
+
+#[derive(Default)]
+struct Stats {
+    count: usize,
+    sum: f64,
+    squared_sum: f64,
+}
+
+impl Stats {
+    pub fn count(&self) -> usize {
+        self.count
+    }
+
+    pub fn mean(&self) -> f64 {
+        self.sum / (self.count as f64)
+    }
+
+    fn square_mean(&self) -> f64 {
+        self.squared_sum / (self.count as f64)
+    }
+
+    pub fn standard_deviation(&self) -> f64 {
+        let mean = self.mean();
+        (self.square_mean() - mean * mean).sqrt()
+    }
+
+    fn non_zero_count(self) -> Option<Stats> {
+        if self.count == 0 {
+            None
+        } else {
+            Some(self)
+        }
+    }
+}
+
+struct StatsCollector {
+    field: Field,
+}
+
+impl StatsCollector {
+    fn with_field(field: Field) -> StatsCollector {
+        StatsCollector { field }
+    }
+}
+
+impl Collector for StatsCollector {
+    // That's the type of our result.
+    // Our standard deviation will be a float.
+    type Fruit = Option<Stats>;
+
+    type Child = StatsSegmentCollector;
+
+    fn for_segment(
+        &self,
+        _segment_local_id: u32,
+        segment: &SegmentReader,
+    ) -> tantivy::Result<StatsSegmentCollector> {
+        let fast_field_reader = segment.fast_field_reader(self.field)?;
+        Ok(StatsSegmentCollector {
+            fast_field_reader,
+            stats: Stats::default(),
+        })
+    }
+
+    fn requires_scoring(&self) -> bool {
+        // this collector does not care about score.
+        false
+    }
+
+    fn merge_fruits(&self, segment_stats: Vec<Option<Stats>>) -> tantivy::Result<Option<Stats>> {
+        let mut stats = Stats::default();
+        for segment_stats_opt in segment_stats {
+            if let Some(segment_stats) = segment_stats_opt {
+                stats.count += segment_stats.count;
+                stats.sum += segment_stats.sum;
+                stats.squared_sum += segment_stats.squared_sum;
+            }
+        }
+        Ok(stats.non_zero_count())
+    }
+}
+
+struct StatsSegmentCollector {
+    fast_field_reader: FastFieldReader<u64>,
+    stats: Stats,
+}
+
+impl SegmentCollector for StatsSegmentCollector {
+    type Fruit = Option<Stats>;
+
+    fn collect(&mut self, doc: u32, _score: f32) {
+        let value = self.fast_field_reader.get(doc) as f64;
+        self.stats.count += 1;
+        self.stats.sum += value;
+        self.stats.squared_sum += value * value;
+    }
+
+    fn harvest(self) -> <Self as SegmentCollector>::Fruit {
+        self.stats.non_zero_count()
+    }
+}
+
+fn main() -> tantivy::Result<()> {
+    // # Defining the schema
+    //
+    // The Tantivy index requires a very strict schema.
+    // The schema declares which fields are in the index,
+    // and for each field, its type and "the way it should
+    // be indexed".
+
+    // first we need to define a schema ...
+    let mut schema_builder = Schema::builder();
+
+    // We'll assume a fictional index containing
+    // products, and with a name, a description, and a price.
+    let product_name = schema_builder.add_text_field("name", TEXT);
+    let product_description = schema_builder.add_text_field("description", TEXT);
+    let price = schema_builder.add_u64_field("price", INT_INDEXED | FAST);
+    let schema = schema_builder.build();
+
+    // # Indexing documents
+    //
+    // Lets index a bunch of fake documents for the sake of
+    // this example.
+    let index = Index::create_in_ram(schema.clone());
+
+    let mut index_writer = index.writer(50_000_000)?;
+    index_writer.add_document(doc!(
+        product_name => "Super Broom 2000",
+        product_description => "While it is ok for short distance travel, this broom \
+        was designed quiditch. It will up your game.",
+        price => 30_200u64
+    ));
+    index_writer.add_document(doc!(
+        product_name => "Turbulobroom",
+        product_description => "You might have heard of this broom before : it is the sponsor of the Wales team.\
+            You'll enjoy its sharp turns, and rapid acceleration",
+        price => 29_240u64
+    ));
+    index_writer.add_document(doc!(
+        product_name => "Broomio",
+        product_description => "Great value for the price. This broom is a market favorite",
+        price => 21_240u64
+    ));
+    index_writer.add_document(doc!(
+        product_name => "Whack a Mole",
+        product_description => "Prime quality bat.",
+        price => 5_200u64
+    ));
+    index_writer.commit()?;
+    index.load_searchers()?;
+
+    let searcher = index.searcher();
+    let query_parser = QueryParser::for_index(&index, vec![product_name, product_description]);
+
+    // here we want to get a hit on the 'ken' in Frankenstein
+    let query = query_parser.parse_query("broom")?;
+    if let Some(stats) = searcher.search(&query, &StatsCollector::with_field(price))? {
+        println!("count: {}", stats.count());
+        println!("mean: {}", stats.mean());
+        println!("standard deviation: {}", stats.standard_deviation());
+    }
+
+    Ok(())
+}
--- a/examples/custom_tokenizer.rs
+++ b/examples/custom_tokenizer.rs
@@ -1,27 +1,17 @@
-extern crate tantivy;
-extern crate tempdir;
+// # Defining a tokenizer pipeline
+//
+// In this example, we'll see how to define a tokenizer pipeline
+// by aligning a bunch of `TokenFilter`.

 #[macro_use]
-extern crate serde_json;
-
-use std::path::Path;
-use tantivy::collector::TopCollector;
+extern crate tantivy;
+use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
 use tantivy::schema::*;
 use tantivy::tokenizer::NgramTokenizer;
 use tantivy::Index;
-use tempdir::TempDir;

-fn main() {
-    // Let's create a temporary directory for the
-    // sake of this example
-    if let Ok(dir) = TempDir::new("tantivy_token_example_dir") {
-        run_example(dir.path()).unwrap();
-        dir.close().unwrap();
-    }
-}
-
-fn run_example(index_path: &Path) -> tantivy::Result<()> {
+fn main() -> tantivy::Result<()> {
    // # Defining the schema
    //
    // The Tantivy index requires a very strict schema.
@@ -30,7 +20,7 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // be indexed".

    // first we need to define a schema ...
-    let mut schema_builder = SchemaBuilder::default();
+    let mut schema_builder = Schema::builder();

    // Our first field is title.
    // In this example we want to use NGram searching
@@ -42,7 +32,7 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    let text_options = TextOptions::default()
        .set_indexing_options(text_field_indexing)
        .set_stored();
-    schema_builder.add_text_field("title", text_options);
+    let title = schema_builder.add_text_field("title", text_options);

    // Our second field is body.
    // We want full-text search for it, but we do not
@@ -51,17 +41,17 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    //
    // We can make our index lighter and
    // by omitting `STORED` flag.
-    schema_builder.add_text_field("body", TEXT);
+    let body = schema_builder.add_text_field("body", TEXT);

    let schema = schema_builder.build();

    // # Indexing documents
    //
    // Let's create a brand new index.
-    //
-    // This will actually just save a meta.json
-    // with our schema in the directory.
-    let index = Index::create_in_dir(index_path, schema.clone())?;
+    // To simplify we will work entirely in RAM.
+    // This is not what you want in reality, but it is very useful
+    // for your unit tests... Or this example.
+    let index = Index::create_in_ram(schema.clone());

    // here we are registering our custome tokenizer
    // this will store tokens of 3 characters each
@@ -77,101 +67,32 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // Here we use a buffer of 50MB per thread. Using a bigger
    // heap for the indexer can increase its throughput.
    let mut index_writer = index.writer(50_000_000)?;
-
-    // Let's index our documents!
-    // We first need a handle on the title and the body field.
-
-    // ### Create a document "manually".
-    //
-    // We can create a document manually, by setting the fields
-    // one by one in a Document object.
-    let title = schema.get_field("title").unwrap();
-    let body = schema.get_field("body").unwrap();
-
-    let mut old_man_doc = Document::default();
-    old_man_doc.add_text(title, "The Old Man and the Sea");
-    old_man_doc.add_text(
-        body,
-        "He was an old man who fished alone in a skiff in the Gulf Stream and \
-         he had gone eighty-four days now without taking a fish.",
-    );
-
-    // ... and add it to the `IndexWriter`.
-    index_writer.add_document(old_man_doc);
-
-    // ### Create a document directly from json.
-    //
-    // Alternatively, we can use our schema to parse a
-    // document object directly from json.
-    // The document is a string, but we use the `json` macro
-    // from `serde_json` for the convenience of multi-line support.
-    let json = json!({
-       "title": "Of Mice and Men",
-       "body": "A few miles south of Soledad, the Salinas River drops in close to the hillside \
-                bank and runs deep and green. The water is warm too, for it has slipped twinkling \
-                over the yellow sands in the sunlight before reaching the narrow pool. On one \
-                side of the river the golden foothill slopes curve up to the strong and rocky \
-                Gabilan Mountains, but on the valley side the water is lined with trees—willows \
-                fresh and green with every spring, carrying in their lower leaf junctures the \
-                debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
-                limbs and branches that arch over the pool"
-    });
-    let mice_and_men_doc = schema.parse_document(&json.to_string())?;
-
-    index_writer.add_document(mice_and_men_doc);
-
-    // Multi-valued field are allowed, they are
-    // expressed in JSON by an array.
-    // The following document has two titles.
-    let json = json!({
-       "title": ["Frankenstein", "The Modern Prometheus"],
-       "body": "You will rejoice to hear that no disaster has accompanied the commencement of an \
-                enterprise which you have regarded with such evil forebodings.  I arrived here \
-                yesterday, and my first task is to assure my dear sister of my welfare and \
-                increasing confidence in the success of my undertaking."
-    });
-    let frankenstein_doc = schema.parse_document(&json.to_string())?;
-
-    index_writer.add_document(frankenstein_doc);
-
-    // This is an example, so we will only index 3 documents
-    // here. You can check out tantivy's tutorial to index
-    // the English wikipedia. Tantivy's indexing is rather fast.
-    // Indexing 5 million articles of the English wikipedia takes
-    // around 4 minutes on my computer!
-
-    // ### Committing
-    //
-    // At this point our documents are not searchable.
-    //
-    //
-    // We need to call .commit() explicitly to force the
-    // index_writer to finish processing the documents in the queue,
-    // flush the current index to the disk, and advertise
-    // the existence of new documents.
-    //
-    // This call is blocking.
+    index_writer.add_document(doc!(
+    title => "The Old Man and the Sea",
+    body => "He was an old man who fished alone in a skiff in the Gulf Stream and \
+     he had gone eighty-four days now without taking a fish."
+    ));
+    index_writer.add_document(doc!(
+    title => "Of Mice and Men",
+       body => r#"A few miles south of Soledad, the Salinas River drops in close to the hillside
+                bank and runs deep and green. The water is warm too, for it has slipped twinkling
+                over the yellow sands in the sunlight before reaching the narrow pool. On one
+                side of the river the golden foothill slopes curve up to the strong and rocky
+                Gabilan Mountains, but on the valley side the water is lined with trees—willows
+                fresh and green with every spring, carrying in their lower leaf junctures the
+                debris of the winter’s flooding; and sycamores with mottled, white, recumbent
+                limbs and branches that arch over the pool"#
+    ));
+    index_writer.add_document(doc!(
+    title => "Frankenstein",
+        body => r#"You will rejoice to hear that no disaster has accompanied the commencement of an
+                enterprise which you have regarded with such evil forebodings.  I arrived here
+                yesterday, and my first task is to assure my dear sister of my welfare and
+                increasing confidence in the success of my undertaking."#
+    ));
    index_writer.commit()?;
-
-    // If `.commit()` returns correctly, then all of the
-    // documents that have been added are guaranteed to be
-    // persistently indexed.
-    //
-    // In the scenario of a crash or a power failure,
-    // tantivy behaves as if has rolled back to its last
-    // commit.
-
-    // # Searching
-    //
-    // Let's search our index. Start by reloading
-    // searchers in the index. This should be done
-    // after every commit().
    index.load_searchers()?;

-    // Afterwards create one (or more) searchers.
-    //
-    // You should create a searcher
-    // every time you start a "search query".
    let searcher = index.searcher();

    // The query parser can interpret human queries.
@@ -183,44 +104,12 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // here we want to get a hit on the 'ken' in Frankenstein
    let query = query_parser.parse_query("ken")?;

-    // A query defines a set of documents, as
-    // well as the way they should be scored.
-    //
-    // A query created by the query parser is scored according
-    // to a metric called Tf-Idf, and will consider
-    // any document matching at least one of our terms.
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;

-    // ### Collectors
-    //
-    // We are not interested in all of the documents but
-    // only in the top 10. Keeping track of our top 10 best documents
-    // is the role of the TopCollector.
-    let mut top_collector = TopCollector::with_limit(10);
-
-    // We can now perform our query.
-    searcher.search(&*query, &mut top_collector)?;
-
-    // Our top collector now contains the 10
-    // most relevant doc ids...
-    let doc_addresses = top_collector.docs();
-
-    // The actual documents still need to be
-    // retrieved from Tantivy's store.
-    //
-    // Since the body field was not configured as stored,
-    // the document returned will only contain
-    // a title.
-
-    for doc_address in doc_addresses {
-        let retrieved_doc = searcher.doc(&doc_address)?;
+    for (_, doc_address) in top_docs {
+        let retrieved_doc = searcher.doc(doc_address)?;
        println!("{}", schema.to_json(&retrieved_doc));
    }

-    // Wait for indexing and merging threads to shut down.
-    // Usually this isn't needed, but in `main` we try to
-    // delete the temporary directory and that fails on
-    // Windows if the files are still open.
-    index_writer.wait_merging_threads()?;
-
    Ok(())
 }
--- a/examples/deleting_updating_documents.rs
+++ b/examples/deleting_updating_documents.rs
@@ -0,0 +1,142 @@
+// # Deleting and Updating (?) documents
+//
+// This example explains how to delete and update documents.
+// In fact there is actually no such thing as an update in tantivy.
+//
+// To update a document, you need to delete a document and then reinsert
+// its new version.
+//
+// ---
+// Importing tantivy...
+#[macro_use]
+extern crate tantivy;
+use tantivy::collector::TopDocs;
+use tantivy::query::TermQuery;
+use tantivy::schema::*;
+use tantivy::Index;
+
+// A simple helper function to fetch a single document
+// given its id from our index.
+// It will be helpful to check our work.
+fn extract_doc_given_isbn(index: &Index, isbn_term: &Term) -> tantivy::Result<Option<Document>> {
+    let searcher = index.searcher();
+
+    // This is the simplest query you can think of.
+    // It matches all of the documents containing a specific term.
+    //
+    // The second argument is here to tell we don't care about decoding positions,
+    // or term frequencies.
+    let term_query = TermQuery::new(isbn_term.clone(), IndexRecordOption::Basic);
+    let top_docs = searcher.search(&term_query, &TopDocs::with_limit(1))?;
+
+    if let Some((_score, doc_address)) = top_docs.first() {
+        let doc = searcher.doc(*doc_address)?;
+        Ok(Some(doc))
+    } else {
+        // no doc matching this ID.
+        Ok(None)
+    }
+}
+
+fn main() -> tantivy::Result<()> {
+    // # Defining the schema
+    //
+    // Check out the *basic_search* example if this makes
+    // small sense to you.
+    let mut schema_builder = Schema::builder();
+
+    // Tantivy does not really have a notion of primary id.
+    // This may change in the future.
+    //
+    // Still, we can create a `isbn` field and use it as an id. This
+    // field can be `u64` or a `text`, depending on your use case.
+    // It just needs to be indexed.
+    //
+    // If it is `text`, let's make sure to keep it `raw` and let's avoid
+    // running any text processing on it.
+    // This is done by associating this field to the tokenizer named `raw`.
+    // Rather than building our [`TextOptions`](//docs.rs/tantivy/~0/tantivy/schema/struct.TextOptions.html) manually,
+    // We use the `STRING` shortcut. `STRING` stands for indexed (without term frequency or positions)
+    // and untokenized.
+    //
+    // Because we also want to be able to see this `id` in our returned documents,
+    // we also mark the field as stored.
+    let isbn = schema_builder.add_text_field("isbn", STRING | STORED);
+    let title = schema_builder.add_text_field("title", TEXT | STORED);
+    let schema = schema_builder.build();
+
+    let index = Index::create_in_ram(schema.clone());
+
+    let mut index_writer = index.writer(50_000_000)?;
+
+    // Let's add a couple of documents, for the sake of the example.
+    let mut old_man_doc = Document::default();
+    old_man_doc.add_text(title, "The Old Man and the Sea");
+    index_writer.add_document(doc!(
+        isbn => "978-0099908401",
+        title => "The old Man and the see"
+    ));
+    index_writer.add_document(doc!(
+        isbn => "978-0140177398",
+        title => "Of Mice and Men",
+    ));
+    index_writer.add_document(doc!(
+       title => "Frankentein", //< Oops there is a typo here.
+       isbn => "978-9176370711",
+    ));
+    index_writer.commit()?;
+    index.load_searchers()?;
+
+    let frankenstein_isbn = Term::from_field_text(isbn, "978-9176370711");
+
+    // Oops our frankenstein doc seems mispelled
+    let frankenstein_doc_misspelled = extract_doc_given_isbn(&index, &frankenstein_isbn)?.unwrap();
+    assert_eq!(
+        schema.to_json(&frankenstein_doc_misspelled),
+        r#"{"isbn":["978-9176370711"],"title":["Frankentein"]}"#,
+    );
+
+    // # Update = Delete + Insert
+    //
+    // Here we will want to update the typo in the `Frankenstein` book.
+    //
+    // Tantivy does not handle updates directly, we need to delete
+    // and reinsert the document.
+    //
+    // This can be complicated as it means you need to have access
+    // to the entire document. It is good practise to integrate tantivy
+    // with a key value store for this reason.
+    //
+    // To remove one of the document, we just call `delete_term`
+    // on its id.
+    //
+    // Note that `tantivy` does nothing to enforce the idea that
+    // there is only one document associated to this id.
+    //
+    // Also you might have noticed that we apply the delete before
+    // having committed. This does not matter really...
+    index_writer.delete_term(frankenstein_isbn.clone());
+
+    // We now need to reinsert our document without the typo.
+    index_writer.add_document(doc!(
+       title => "Frankenstein",
+       isbn => "978-9176370711",
+    ));
+
+    // You are guaranteed that your clients will only observe your index in
+    // the state it was in after a commit.
+    // In this example, your search engine will at no point be missing the *Frankenstein* document.
+    // Everything happened as if the document was updated.
+    index_writer.commit()?;
+    // We reload our searcher to make our change available to clients.
+    index.load_searchers()?;
+
+    // No more typo!
+    let frankenstein_new_doc = extract_doc_given_isbn(&index, &frankenstein_isbn)?.unwrap();
+    assert_eq!(
+        schema.to_json(&frankenstein_new_doc),
+        r#"{"isbn":["978-9176370711"],"title":["Frankenstein"]}"#,
+    );
+
+    Ok(())
+}
--- a/examples/faceted_search.rs
+++ b/examples/faceted_search.rs
@@ -0,0 +1,80 @@
+// # Basic Example
+//
+// This example covers the basic functionalities of
+// tantivy.
+//
+// We will :
+// - define our schema
+// = create an index in a directory
+// - index few documents in our index
+// - search for the best document matchings "sea whale"
+// - retrieve the best document original content.
+
+extern crate tempdir;
+
+// ---
+// Importing tantivy...
+#[macro_use]
+extern crate tantivy;
+use tantivy::collector::FacetCollector;
+use tantivy::query::AllQuery;
+use tantivy::schema::*;
+use tantivy::Index;
+
+fn main() -> tantivy::Result<()> {
+    // Let's create a temporary directory for the
+    // sake of this example
+    let index_path = TempDir::new("tantivy_facet_example_dir")?;
+    let mut schema_builder = Schema::builder();
+
+    schema_builder.add_text_field("name", TEXT | STORED);
+
+    // this is our faceted field
+    schema_builder.add_facet_field("tags");
+
+    let schema = schema_builder.build();
+
+    let index = Index::create_in_dir(&index_path, schema.clone())?;
+
+    let mut index_writer = index.writer(50_000_000)?;
+
+    let name = schema.get_field("name").unwrap();
+    let tags = schema.get_field("tags").unwrap();
+
+    // For convenience, tantivy also comes with a macro to
+    // reduce the boilerplate above.
+    index_writer.add_document(doc!(
+        name => "the ditch",
+        tags => Facet::from("/pools/north")
+    ));
+
+    index_writer.add_document(doc!(
+        name => "little stacey",
+        tags => Facet::from("/pools/south")
+    ));
+
+    index_writer.commit()?;
+
+    index.load_searchers()?;
+
+    let searcher = index.searcher();
+
+    let mut facet_collector = FacetCollector::for_field(tags);
+    facet_collector.add_facet("/pools");
+
+    let facet_counts = searcher.search(&AllQuery, &facet_collector).unwrap();
+
+    // This lists all of the facet counts
+    let facets: Vec<(&Facet, u64)> = facet_counts.get("/pools").collect();
+    assert_eq!(
+        facets,
+        vec![
+            (&Facet::from("/pools/north"), 1),
+            (&Facet::from("/pools/south"), 1),
+        ]
+    );
+
+    Ok(())
+}
+
+use tempdir::TempDir;
--- a/examples/generate_html.sh
+++ b/examples/generate_html.sh
@@ -1,2 +0,0 @@
-#!/bin/bash
-docco simple_search.rs -o html
--- a/examples/html/docco.css
+++ b/examples/html/docco.css
@@ -1,518 +0,0 @@
-/*--------------------- Typography ----------------------------*/
-
-@font-face {
-    font-family: 'aller-light';
-    src: url('public/fonts/aller-light.eot');
-    src: url('public/fonts/aller-light.eot?#iefix') format('embedded-opentype'),
-         url('public/fonts/aller-light.woff') format('woff'),
-         url('public/fonts/aller-light.ttf') format('truetype');
-    font-weight: normal;
-    font-style: normal;
-}
-
-@font-face {
-    font-family: 'aller-bold';
-    src: url('public/fonts/aller-bold.eot');
-    src: url('public/fonts/aller-bold.eot?#iefix') format('embedded-opentype'),
-         url('public/fonts/aller-bold.woff') format('woff'),
-         url('public/fonts/aller-bold.ttf') format('truetype');
-    font-weight: normal;
-    font-style: normal;
-}
-
-@font-face {
-    font-family: 'roboto-black';
-    src: url('public/fonts/roboto-black.eot');
-    src: url('public/fonts/roboto-black.eot?#iefix') format('embedded-opentype'),
-         url('public/fonts/roboto-black.woff') format('woff'),
-         url('public/fonts/roboto-black.ttf') format('truetype');
-    font-weight: normal;
-    font-style: normal;
-}
-
-/*--------------------- Layout ----------------------------*/
-html { height: 100%; }
-body {
-  font-family: "aller-light";
-  font-size: 14px;
-  line-height: 18px;
-  color: #30404f;
-  margin: 0; padding: 0;
-  height:100%;
-}
-#container { min-height: 100%; }
-
-a {
-  color: #000;
-}
-
-b, strong {
-  font-weight: normal;
-  font-family: "aller-bold";
-}
-
-p {
-  margin: 15px 0 0px;
-}
-  .annotation ul, .annotation ol {
-    margin: 25px 0;
-  }
-    .annotation ul li, .annotation ol li {
-      font-size: 14px;
-      line-height: 18px;
-      margin: 10px 0;
-    }
-
-h1, h2, h3, h4, h5, h6 {
-  color: #112233;
-  line-height: 1em;
-  font-weight: normal;
-  font-family: "roboto-black";
-  text-transform: uppercase;
-  margin: 30px 0 15px 0;
-}
-
-h1 {
-  margin-top: 40px;
-}
-h2 {
-  font-size: 1.26em;
-}
-
-hr {
-  border: 0;
-  background: 1px #ddd;
-  height: 1px;
-  margin: 20px 0;
-}
-
-pre, tt, code {
-  font-size: 12px; line-height: 16px;
-  font-family: Menlo, Monaco, Consolas, "Lucida Console", monospace;
-  margin: 0; padding: 0;
-}
-  .annotation pre {
-    display: block;
-    margin: 0;
-    padding: 7px 10px;
-    background: #fcfcfc;
-    -moz-box-shadow:    inset 0 0 10px rgba(0,0,0,0.1);
-    -webkit-box-shadow: inset 0 0 10px rgba(0,0,0,0.1);
-    box-shadow:         inset 0 0 10px rgba(0,0,0,0.1);
-    overflow-x: auto;
-  }
-    .annotation pre code {
-      border: 0;
-      padding: 0;
-      background: transparent;
-    }
-
-
-blockquote {
-  border-left: 5px solid #ccc;
-  margin: 0;
-  padding: 1px 0 1px 1em;
-}
-  .sections blockquote p {
-    font-family: Menlo, Consolas, Monaco, monospace;
-    font-size: 12px; line-height: 16px;
-    color: #999;
-    margin: 10px 0 0;
-    white-space: pre-wrap;
-  }
-
-ul.sections {
-  list-style: none;
-  padding:0 0 5px 0;;
-  margin:0;
-}
-
-/*
-  Force border-box so that % widths fit the parent
-  container without overlap because of margin/padding.
-
-  More Info : http://www.quirksmode.org/css/box.html
-*/
-ul.sections > li > div {
-  -moz-box-sizing: border-box;    /* firefox */
-  -ms-box-sizing: border-box;     /* ie */
-  -webkit-box-sizing: border-box; /* webkit */
-  -khtml-box-sizing: border-box;  /* konqueror */
-  box-sizing: border-box;         /* css3 */
-}
-
-
-/*---------------------- Jump Page -----------------------------*/
-#jump_to, #jump_page {
-  margin: 0;
-  background: white;
-  -webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777;
-  -webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px;
-  font: 16px Arial;
-  cursor: pointer;
-  text-align: right;
-  list-style: none;
-}
-
-#jump_to a {
-  text-decoration: none;
-}
-
-#jump_to a.large {
-  display: none;
-}
-#jump_to a.small {
-  font-size: 22px;
-  font-weight: bold;
-  color: #676767;
-}
-
-#jump_to, #jump_wrapper {
-  position: fixed;
-  right: 0; top: 0;
-  padding: 10px 15px;
-  margin:0;
-}
-
-#jump_wrapper {
-  display: none;
-  padding:0;
-}
-
-#jump_to:hover #jump_wrapper {
-  display: block;
-}
-
-#jump_page_wrapper{
-  position: fixed;
-  right: 0;
-  top: 0;
-  bottom: 0;
-}
-
-#jump_page {
-  padding: 5px 0 3px;
-  margin: 0 0 25px 25px;
-  max-height: 100%;
-  overflow: auto;
-}
-
-#jump_page .source {
-  display: block;
-  padding: 15px;
-  text-decoration: none;
-  border-top: 1px solid #eee;
-}
-
-#jump_page .source:hover {
-  background: #f5f5ff;
-}
-
-#jump_page .source:first-child {
-}
-
-/*---------------------- Low resolutions (> 320px) ---------------------*/
-@media only screen and (min-width: 320px) {
-  .pilwrap { display: none; }
-
-  ul.sections > li > div {
-    display: block;
-    padding:5px 10px 0 10px;
-  }
-
-  ul.sections > li > div.annotation ul, ul.sections > li > div.annotation ol {
-    padding-left: 30px;
-  }
-
-  ul.sections > li > div.content {
-    overflow-x:auto;
-    -webkit-box-shadow: inset 0 0 5px #e5e5ee;
-    box-shadow: inset 0 0 5px #e5e5ee;
-    border: 1px solid #dedede;
-    margin:5px 10px 5px 10px;
-    padding-bottom: 5px;
-  }
-
-  ul.sections > li > div.annotation pre {
-    margin: 7px 0 7px;
-    padding-left: 15px;
-  }
-
-  ul.sections > li > div.annotation p tt, .annotation code {
-    background: #f8f8ff;
-    border: 1px solid #dedede;
-    font-size: 12px;
-    padding: 0 0.2em;
-  }
-}
-
-/*----------------------  (> 481px) ---------------------*/
-@media only screen and (min-width: 481px) {
-  #container {
-    position: relative;
-  }
-  body {
-    background-color: #F5F5FF;
-    font-size: 15px;
-    line-height: 21px;
-  }
-  pre, tt, code {
-    line-height: 18px;
-  }
-  p, ul, ol {
-    margin: 0 0 15px;
-  }
-
-
-  #jump_to {
-    padding: 5px 10px;
-  }
-  #jump_wrapper {
-    padding: 0;
-  }
-  #jump_to, #jump_page {
-    font: 10px Arial;
-    text-transform: uppercase;
-  }
-  #jump_page .source {
-    padding: 5px 10px;
-  }
-  #jump_to a.large {
-    display: inline-block;
-  }
-  #jump_to a.small {
-    display: none;
-  }
-
-
-
-  #background {
-    position: absolute;
-    top: 0; bottom: 0;
-    width: 350px;
-    background: #fff;
-    border-right: 1px solid #e5e5ee;
-    z-index: -1;
-  }
-
-  ul.sections > li > div.annotation ul, ul.sections > li > div.annotation ol {
-    padding-left: 40px;
-  }
-
-  ul.sections > li {
-    white-space: nowrap;
-  }
-
-  ul.sections > li > div {
-    display: inline-block;
-  }
-
-  ul.sections > li > div.annotation {
-    max-width: 350px;
-    min-width: 350px;
-    min-height: 5px;
-    padding: 13px;
-    overflow-x: hidden;
-    white-space: normal;
-    vertical-align: top;
-    text-align: left;
-  }
-  ul.sections > li > div.annotation pre {
-    margin: 15px 0 15px;
-    padding-left: 15px;
-  }
-
-  ul.sections > li > div.content {
-    padding: 13px;
-    vertical-align: top;
-    border: none;
-    -webkit-box-shadow: none;
-    box-shadow: none;
-  }
-
-  .pilwrap {
-    position: relative;
-    display: inline;
-  }
-
-  .pilcrow {
-    font: 12px Arial;
-    text-decoration: none;
-    color: #454545;
-    position: absolute;
-    top: 3px; left: -20px;
-    padding: 1px 2px;
-    opacity: 0;
-    -webkit-transition: opacity 0.2s linear;
-  }
-    .for-h1 .pilcrow {
-      top: 47px;
-    }
-    .for-h2 .pilcrow, .for-h3 .pilcrow, .for-h4 .pilcrow {
-      top: 35px;
-    }
-
-  ul.sections > li > div.annotation:hover .pilcrow {
-    opacity: 1;
-  }
-}
-
-/*---------------------- (> 1025px) ---------------------*/
-@media only screen and (min-width: 1025px) {
-
-  body {
-    font-size: 16px;
-    line-height: 24px;
-  }
-
-  #background {
-    width: 525px;
-  }
-  ul.sections > li > div.annotation {
-    max-width: 525px;
-    min-width: 525px;
-    padding: 10px 25px 1px 50px;
-  }
-  ul.sections > li > div.content {
-    padding: 9px 15px 16px 25px;
-  }
-}
-
-/*---------------------- Syntax Highlighting -----------------------------*/
-
-td.linenos { background-color: #f0f0f0; padding-right: 10px; }
-span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }
-/*
-
-github.com style (c) Vasily Polovnyov <vast@whiteants.net>
-
-*/
-
-pre code {
-  display: block; padding: 0.5em;
-  color: #000;
-  background: #f8f8ff
-}
-
-pre .hljs-comment,
-pre .hljs-template_comment,
-pre .hljs-diff .hljs-header,
-pre .hljs-javadoc {
-  color: #408080;
-  font-style: italic
-}
-
-pre .hljs-keyword,
-pre .hljs-assignment,
-pre .hljs-literal,
-pre .hljs-css .hljs-rule .hljs-keyword,
-pre .hljs-winutils,
-pre .hljs-javascript .hljs-title,
-pre .hljs-lisp .hljs-title,
-pre .hljs-subst {
-  color: #954121;
-  /*font-weight: bold*/
-}
-
-pre .hljs-number,
-pre .hljs-hexcolor {
-  color: #40a070
-}
-
-pre .hljs-string,
-pre .hljs-tag .hljs-value,
-pre .hljs-phpdoc,
-pre .hljs-tex .hljs-formula {
-  color: #219161;
-}
-
-pre .hljs-title,
-pre .hljs-id {
-  color: #19469D;
-}
-pre .hljs-params {
-  color: #00F;
-}
-
-pre .hljs-javascript .hljs-title,
-pre .hljs-lisp .hljs-title,
-pre .hljs-subst {
-  font-weight: normal
-}
-
-pre .hljs-class .hljs-title,
-pre .hljs-haskell .hljs-label,
-pre .hljs-tex .hljs-command {
-  color: #458;
-  font-weight: bold
-}
-
-pre .hljs-tag,
-pre .hljs-tag .hljs-title,
-pre .hljs-rules .hljs-property,
-pre .hljs-django .hljs-tag .hljs-keyword {
-  color: #000080;
-  font-weight: normal
-}
-
-pre .hljs-attribute,
-pre .hljs-variable,
-pre .hljs-instancevar,
-pre .hljs-lisp .hljs-body {
-  color: #008080
-}
-
-pre .hljs-regexp {
-  color: #B68
-}
-
-pre .hljs-class {
-  color: #458;
-  font-weight: bold
-}
-
-pre .hljs-symbol,
-pre .hljs-ruby .hljs-symbol .hljs-string,
-pre .hljs-ruby .hljs-symbol .hljs-keyword,
-pre .hljs-ruby .hljs-symbol .hljs-keymethods,
-pre .hljs-lisp .hljs-keyword,
-pre .hljs-tex .hljs-special,
-pre .hljs-input_number {
-  color: #990073
-}
-
-pre .hljs-builtin,
-pre .hljs-constructor,
-pre .hljs-built_in,
-pre .hljs-lisp .hljs-title {
-  color: #0086b3
-}
-
-pre .hljs-preprocessor,
-pre .hljs-pi,
-pre .hljs-doctype,
-pre .hljs-shebang,
-pre .hljs-cdata {
-  color: #999;
-  font-weight: bold
-}
-
-pre .hljs-deletion {
-  background: #fdd
-}
-
-pre .hljs-addition {
-  background: #dfd
-}
-
-pre .hljs-diff .hljs-change {
-  background: #0086b3
-}
-
-pre .hljs-chunk {
-  color: #aaa
-}
-
-pre .hljs-tex .hljs-formula {
-  opacity: 0.5;
-}
--- a/examples/html/public/fonts/aller-bold.eot
+++ b/examples/html/public/fonts/aller-bold.eot
--- a/examples/html/public/fonts/aller-bold.ttf
+++ b/examples/html/public/fonts/aller-bold.ttf
--- a/examples/html/public/fonts/aller-bold.woff
+++ b/examples/html/public/fonts/aller-bold.woff
--- a/examples/html/public/fonts/aller-light.eot
+++ b/examples/html/public/fonts/aller-light.eot
--- a/examples/html/public/fonts/aller-light.ttf
+++ b/examples/html/public/fonts/aller-light.ttf
--- a/examples/html/public/fonts/aller-light.woff
+++ b/examples/html/public/fonts/aller-light.woff
--- a/examples/html/public/fonts/fleurons.eot
+++ b/examples/html/public/fonts/fleurons.eot
--- a/examples/html/public/fonts/fleurons.ttf
+++ b/examples/html/public/fonts/fleurons.ttf
--- a/examples/html/public/fonts/fleurons.woff
+++ b/examples/html/public/fonts/fleurons.woff
--- a/examples/html/public/fonts/roboto-black.eot
+++ b/examples/html/public/fonts/roboto-black.eot
--- a/examples/html/public/fonts/roboto-black.ttf
+++ b/examples/html/public/fonts/roboto-black.ttf
--- a/examples/html/public/fonts/roboto-black.woff
+++ b/examples/html/public/fonts/roboto-black.woff
--- a/examples/html/public/images/gray.png
+++ b/examples/html/public/images/gray.png
--- a/examples/html/public/stylesheets/normalize.css
+++ b/examples/html/public/stylesheets/normalize.css
@@ -1,375 +0,0 @@
-/*! normalize.css v2.0.1 | MIT License | git.io/normalize */
-
-/* ==========================================================================
-   HTML5 display definitions
-   ========================================================================== */
-
-/*
- * Corrects `block` display not defined in IE 8/9.
- */
-
-article,
-aside,
-details,
-figcaption,
-figure,
-footer,
-header,
-hgroup,
-nav,
-section,
-summary {
-    display: block;
-}
-
-/*
- * Corrects `inline-block` display not defined in IE 8/9.
- */
-
-audio,
-canvas,
-video {
-    display: inline-block;
-}
-
-/*
- * Prevents modern browsers from displaying `audio` without controls.
- * Remove excess height in iOS 5 devices.
- */
-
-audio:not([controls]) {
-    display: none;
-    height: 0;
-}
-
-/*
- * Addresses styling for `hidden` attribute not present in IE 8/9.
- */
-
-[hidden] {
-    display: none;
-}
-
-/* ==========================================================================
-   Base
-   ========================================================================== */
-
-/*
- * 1. Sets default font family to sans-serif.
- * 2. Prevents iOS text size adjust after orientation change, without disabling
- *    user zoom.
- */
-
-html {
-    font-family: sans-serif; /* 1 */
-    -webkit-text-size-adjust: 100%; /* 2 */
-    -ms-text-size-adjust: 100%; /* 2 */
-}
-
-/*
- * Removes default margin.
- */
-
-body {
-    margin: 0;
-}
-
-/* ==========================================================================
-   Links
-   ========================================================================== */
-
-/*
- * Addresses `outline` inconsistency between Chrome and other browsers.
- */
-
-a:focus {
-    outline: thin dotted;
-}
-
-/*
- * Improves readability when focused and also mouse hovered in all browsers.
- */
-
-a:active,
-a:hover {
-    outline: 0;
-}
-
-/* ==========================================================================
-   Typography
-   ========================================================================== */
-
-/*
- * Addresses `h1` font sizes within `section` and `article` in Firefox 4+,
- * Safari 5, and Chrome.
- */
-
-h1 {
-    font-size: 2em;
-}
-
-/*
- * Addresses styling not present in IE 8/9, Safari 5, and Chrome.
- */
-
-abbr[title] {
-    border-bottom: 1px dotted;
-}
-
-/*
- * Addresses style set to `bolder` in Firefox 4+, Safari 5, and Chrome.
- */
-
-b,
-strong {
-    font-weight: bold;
-}
-
-/*
- * Addresses styling not present in Safari 5 and Chrome.
- */
-
-dfn {
-    font-style: italic;
-}
-
-/*
- * Addresses styling not present in IE 8/9.
- */
-
-mark {
-    background: #ff0;
-    color: #000;
-}
-
-
-/*
- * Corrects font family set oddly in Safari 5 and Chrome.
- */
-
-code,
-kbd,
-pre,
-samp {
-    font-family: monospace, serif;
-    font-size: 1em;
-}
-
-/*
- * Improves readability of pre-formatted text in all browsers.
- */
-
-pre {
-    white-space: pre;
-    white-space: pre-wrap;
-    word-wrap: break-word;
-}
-
-/*
- * Sets consistent quote types.
- */
-
-q {
-    quotes: "\201C" "\201D" "\2018" "\2019";
-}
-
-/*
- * Addresses inconsistent and variable font size in all browsers.
- */
-
-small {
-    font-size: 80%;
-}
-
-/*
- * Prevents `sub` and `sup` affecting `line-height` in all browsers.
- */
-
-sub,
-sup {
-    font-size: 75%;
-    line-height: 0;
-    position: relative;
-    vertical-align: baseline;
-}
-
-sup {
-    top: -0.5em;
-}
-
-sub {
-    bottom: -0.25em;
-}
-
-/* ==========================================================================
-   Embedded content
-   ========================================================================== */
-
-/*
- * Removes border when inside `a` element in IE 8/9.
- */
-
-img {
-    border: 0;
-}
-
-/*
- * Corrects overflow displayed oddly in IE 9.
- */
-
-svg:not(:root) {
-    overflow: hidden;
-}
-
-/* ==========================================================================
-   Figures
-   ========================================================================== */
-
-/*
- * Addresses margin not present in IE 8/9 and Safari 5.
- */
-
-figure {
-    margin: 0;
-}
-
-/* ==========================================================================
-   Forms
-   ========================================================================== */
-
-/*
- * Define consistent border, margin, and padding.
- */
-
-fieldset {
-    border: 1px solid #c0c0c0;
-    margin: 0 2px;
-    padding: 0.35em 0.625em 0.75em;
-}
-
-/*
- * 1. Corrects color not being inherited in IE 8/9.
- * 2. Remove padding so people aren't caught out if they zero out fieldsets.
- */
-
-legend {
-    border: 0; /* 1 */
-    padding: 0; /* 2 */
-}
-
-/*
- * 1. Corrects font family not being inherited in all browsers.
- * 2. Corrects font size not being inherited in all browsers.
- * 3. Addresses margins set differently in Firefox 4+, Safari 5, and Chrome
- */
-
-button,
-input,
-select,
-textarea {
-    font-family: inherit; /* 1 */
-    font-size: 100%; /* 2 */
-    margin: 0; /* 3 */
-}
-
-/*
- * Addresses Firefox 4+ setting `line-height` on `input` using `!important` in
- * the UA stylesheet.
- */
-
-button,
-input {
-    line-height: normal;
-}
-
-/*
- * 1. Avoid the WebKit bug in Android 4.0.* where (2) destroys native `audio`
- *    and `video` controls.
- * 2. Corrects inability to style clickable `input` types in iOS.
- * 3. Improves usability and consistency of cursor style between image-type
- *    `input` and others.
- */
-
-button,
-html input[type="button"], /* 1 */
-input[type="reset"],
-input[type="submit"] {
-    -webkit-appearance: button; /* 2 */
-    cursor: pointer; /* 3 */
-}
-
-/*
- * Re-set default cursor for disabled elements.
- */
-
-button[disabled],
-input[disabled] {
-    cursor: default;
-}
-
-/*
- * 1. Addresses box sizing set to `content-box` in IE 8/9.
- * 2. Removes excess padding in IE 8/9.
- */
-
-input[type="checkbox"],
-input[type="radio"] {
-    box-sizing: border-box; /* 1 */
-    padding: 0; /* 2 */
-}
-
-/*
- * 1. Addresses `appearance` set to `searchfield` in Safari 5 and Chrome.
- * 2. Addresses `box-sizing` set to `border-box` in Safari 5 and Chrome
- *    (include `-moz` to future-proof).
- */
-
-input[type="search"] {
-    -webkit-appearance: textfield; /* 1 */
-    -moz-box-sizing: content-box;
-    -webkit-box-sizing: content-box; /* 2 */
-    box-sizing: content-box;
-}
-
-/*
- * Removes inner padding and search cancel button in Safari 5 and Chrome
- * on OS X.
- */
-
-input[type="search"]::-webkit-search-cancel-button,
-input[type="search"]::-webkit-search-decoration {
-    -webkit-appearance: none;
-}
-
-/*
- * Removes inner padding and border in Firefox 4+.
- */
-
-button::-moz-focus-inner,
-input::-moz-focus-inner {
-    border: 0;
-    padding: 0;
-}
-
-/*
- * 1. Removes default vertical scrollbar in IE 8/9.
- * 2. Improves readability and alignment in all browsers.
- */
-
-textarea {
-    overflow: auto; /* 1 */
-    vertical-align: top; /* 2 */
-}
-
-/* ==========================================================================
-   Tables
-   ========================================================================== */
-
-/*
- * Remove most spacing between table cells.
- */
-
-table {
-    border-collapse: collapse;
-    border-spacing: 0;
-}
--- a/examples/html/simple_search.html
+++ b/examples/html/simple_search.html
@@ -1,542 +0,0 @@
-<!DOCTYPE html>
-
-<html>
-<head>
-  <title>simple_search.rs</title>
-  <meta http-equiv="content-type" content="text/html; charset=UTF-8">
-  <meta name="viewport" content="width=device-width, target-densitydpi=160dpi, initial-scale=1.0; maximum-scale=1.0; user-scalable=0;">
-  <link rel="stylesheet" media="all" href="docco.css" />
-</head>
-<body>
-  <div id="container">
-    <div id="background"></div>
-    
-    <ul class="sections">
-        
-          <li id="title">
-              <div class="annotation">
-                  <h1>simple_search.rs</h1>
-              </div>
-          </li>
-        
-        
-        
-        <li id="section-1">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-1">&#182;</a>
-              </div>
-              
-            </div>
-            
-            <div class="content"><div class='highlight'><pre><span class="hljs-keyword">extern</span> <span class="hljs-keyword">crate</span> tantivy;
-<span class="hljs-keyword">extern</span> <span class="hljs-keyword">crate</span> tempdir;
-
-<span class="hljs-meta">#[macro_use]</span>
-<span class="hljs-keyword">extern</span> <span class="hljs-keyword">crate</span> serde_json;
-
-<span class="hljs-keyword">use</span> std::path::Path;
-<span class="hljs-keyword">use</span> tempdir::TempDir;
-<span class="hljs-keyword">use</span> tantivy::Index;
-<span class="hljs-keyword">use</span> tantivy::schema::*;
-<span class="hljs-keyword">use</span> tantivy::collector::TopCollector;
-<span class="hljs-keyword">use</span> tantivy::query::QueryParser;
-
-<span class="hljs-function"><span class="hljs-keyword">fn</span> <span class="hljs-title">main</span></span>() {</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-2">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-2">&#182;</a>
-              </div>
-              <p>Let’s create a temporary directory for the
-sake of this example</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">if</span> <span class="hljs-keyword">let</span> <span class="hljs-literal">Ok</span>(dir) = TempDir::new(<span class="hljs-string">"tantivy_example_dir"</span>) {
-        run_example(dir.path()).unwrap();
-        dir.close().unwrap();
-    }
-}
-
-
-<span class="hljs-function"><span class="hljs-keyword">fn</span> <span class="hljs-title">run_example</span></span>(index_path: &amp;Path) -&gt; tantivy::<span class="hljs-built_in">Result</span>&lt;()&gt; {</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-3">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-3">&#182;</a>
-              </div>
-              <h1 id="defining-the-schema">Defining the schema</h1>
-<p>The Tantivy index requires a very strict schema.
-The schema declares which fields are in the index,
-and for each field, its type and “the way it should
-be indexed”.</p>
-
-            </div>
-            
-        </li>
-        
-        
-        <li id="section-4">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-4">&#182;</a>
-              </div>
-              <p>first we need to define a schema …</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> schema_builder = SchemaBuilder::<span class="hljs-keyword">default</span>();</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-5">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-5">&#182;</a>
-              </div>
-              <p>Our first field is title.
-We want full-text search for it, and we also want 
-to be able to retrieve the document after the search.</p>
-<p>TEXT | STORED is some syntactic sugar to describe
-that.</p>
-<p><code>TEXT</code> means the field should be tokenized and indexed,
-along with its term frequency and term positions.</p>
-<p><code>STORED</code> means that the field will also be saved
-in a compressed, row-oriented key-value store.
-This store is useful to reconstruct the
-documents that were selected during the search phase.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    schema_builder.add_text_field(<span class="hljs-string">"title"</span>, TEXT | STORED);</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-6">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-6">&#182;</a>
-              </div>
-              <p>Our second field is body.
-We want full-text search for it, but we do not 
-need to be able to be able to retrieve it
-for our application. </p>
-<p>We can make our index lighter and 
-by omitting <code>STORED</code> flag.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    schema_builder.add_text_field(<span class="hljs-string">"body"</span>, TEXT);
-
-    <span class="hljs-keyword">let</span> schema = schema_builder.build();</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-7">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-7">&#182;</a>
-              </div>
-              <h1 id="indexing-documents">Indexing documents</h1>
-<p>Let’s create a brand new index.</p>
-<p>This will actually just save a meta.json
-with our schema in the directory.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> index = Index::create(index_path, schema.clone())?;</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-8">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-8">&#182;</a>
-              </div>
-              <p>To insert document we need an index writer.
-There must be only one writer at a time.
-This single <code>IndexWriter</code> is already
-multithreaded.</p>
-<p>Here we use a buffer of 50MB per thread. Using a bigger
-heap for the indexer can increase its throughput.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> index_writer = index.writer(<span class="hljs-number">50_000_000</span>)?;</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-9">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-9">&#182;</a>
-              </div>
-              <p>Let’s index our documents!
-We first need a handle on the title and the body field.</p>
-
-            </div>
-            
-        </li>
-        
-        
-        <li id="section-10">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-10">&#182;</a>
-              </div>
-              <h3 id="create-a-document-manually-">Create a document “manually”.</h3>
-<p>We can create a document manually, by setting the fields
-one by one in a Document object.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> title = schema.get_field(<span class="hljs-string">"title"</span>).unwrap();
-    <span class="hljs-keyword">let</span> body = schema.get_field(<span class="hljs-string">"body"</span>).unwrap();
-
-    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> old_man_doc = Document::<span class="hljs-keyword">default</span>();
-    old_man_doc.add_text(title, <span class="hljs-string">"The Old Man and the Sea"</span>);
-    old_man_doc.add_text(
-        body,
-        <span class="hljs-string">"He was an old man who fished alone in a skiff in the Gulf Stream and \
-                          he had gone eighty-four days now without taking a fish."</span>,
-    );</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-11">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-11">&#182;</a>
-              </div>
-              <p>… and add it to the <code>IndexWriter</code>.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    index_writer.add_document(old_man_doc);</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-12">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-12">&#182;</a>
-              </div>
-              <h3 id="create-a-document-directly-from-json-">Create a document directly from json.</h3>
-<p>Alternatively, we can use our schema to parse a
-document object directly from json.
-The document is a string, but we use the <code>json</code> macro
-from <code>serde_json</code> for the convenience of multi-line support.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> json = json!({
-       <span class="hljs-string">"title"</span>: <span class="hljs-string">"Of Mice and Men"</span>,
-       <span class="hljs-string">"body"</span>: <span class="hljs-string">"A few miles south of Soledad, the Salinas River drops in close to the hillside \
-                bank and runs deep and green. The water is warm too, for it has slipped twinkling \
-                over the yellow sands in the sunlight before reaching the narrow pool. On one \
-                side of the river the golden foothill slopes curve up to the strong and rocky \
-                Gabilan Mountains, but on the valley side the water is lined with trees—willows \
-                fresh and green with every spring, carrying in their lower leaf junctures the \
-                debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
-                limbs and branches that arch over the pool"</span>
-    });
-    <span class="hljs-keyword">let</span> mice_and_men_doc = schema.parse_document(&amp;json.to_string())?;
-
-    index_writer.add_document(mice_and_men_doc);</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-13">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-13">&#182;</a>
-              </div>
-              <p>Multi-valued field are allowed, they are
-expressed in JSON by an array.
-The following document has two titles.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> json = json!({
-       <span class="hljs-string">"title"</span>: [<span class="hljs-string">"Frankenstein"</span>, <span class="hljs-string">"The Modern Prometheus"</span>],
-       <span class="hljs-string">"body"</span>: <span class="hljs-string">"You will rejoice to hear that no disaster has accompanied the commencement of an \
-                enterprise which you have regarded with such evil forebodings.  I arrived here \
-                yesterday, and my first task is to assure my dear sister of my welfare and \
-                increasing confidence in the success of my undertaking."</span>
-    });
-    <span class="hljs-keyword">let</span> frankenstein_doc = schema.parse_document(&amp;json.to_string())?;
-
-    index_writer.add_document(frankenstein_doc);</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-14">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-14">&#182;</a>
-              </div>
-              <p>This is an example, so we will only index 3 documents
-here. You can check out tantivy’s tutorial to index
-the English wikipedia. Tantivy’s indexing is rather fast.
-Indexing 5 million articles of the English wikipedia takes
-around 4 minutes on my computer!</p>
-
-            </div>
-            
-        </li>
-        
-        
-        <li id="section-15">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-15">&#182;</a>
-              </div>
-              <h3 id="committing">Committing</h3>
-<p>At this point our documents are not searchable.</p>
-<p>We need to call .commit() explicitly to force the
-index_writer to finish processing the documents in the queue,
-flush the current index to the disk, and advertise
-the existence of new documents.</p>
-<p>This call is blocking.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    index_writer.commit()?;</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-16">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-16">&#182;</a>
-              </div>
-              <p>If <code>.commit()</code> returns correctly, then all of the
-documents that have been added are guaranteed to be
-persistently indexed.</p>
-<p>In the scenario of a crash or a power failure,
-tantivy behaves as if has rolled back to its last
-commit.</p>
-
-            </div>
-            
-        </li>
-        
-        
-        <li id="section-17">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-17">&#182;</a>
-              </div>
-              <h1 id="searching">Searching</h1>
-<p>Let’s search our index. Start by reloading
-searchers in the index. This should be done
-after every commit().</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    index.load_searchers()?;</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-18">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-18">&#182;</a>
-              </div>
-              <p>Afterwards create one (or more) searchers.</p>
-<p>You should create a searcher
-every time you start a “search query”.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> searcher = index.searcher();</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-19">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-19">&#182;</a>
-              </div>
-              <p>The query parser can interpret human queries.
-Here, if the user does not specify which
-field they want to search, tantivy will search
-in both title and body.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> query_parser = QueryParser::for_index(index, <span class="hljs-built_in">vec!</span>[title, body]);</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-20">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-20">&#182;</a>
-              </div>
-              <p>QueryParser may fail if the query is not in the right
-format. For user facing applications, this can be a problem.
-A ticket has been opened regarding this problem.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> query = query_parser.parse_query(<span class="hljs-string">"sea whale"</span>)?;</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-21">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-21">&#182;</a>
-              </div>
-              <p>A query defines a set of documents, as
-well as the way they should be scored.</p>
-<p>A query created by the query parser is scored according
-to a metric called Tf-Idf, and will consider
-any document matching at least one of our terms.</p>
-
-            </div>
-            
-        </li>
-        
-        
-        <li id="section-22">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-22">&#182;</a>
-              </div>
-              <h3 id="collectors">Collectors</h3>
-<p>We are not interested in all of the documents but
-only in the top 10. Keeping track of our top 10 best documents
-is the role of the TopCollector.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> top_collector = TopCollector::with_limit(<span class="hljs-number">10</span>);</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-23">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-23">&#182;</a>
-              </div>
-              <p>We can now perform our query.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    searcher.search(&amp;*query, &amp;<span class="hljs-keyword">mut</span> top_collector)?;</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-24">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-24">&#182;</a>
-              </div>
-              <p>Our top collector now contains the 10
-most relevant doc ids…</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> doc_addresses = top_collector.docs();</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-25">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-25">&#182;</a>
-              </div>
-              <p>The actual documents still need to be
-retrieved from Tantivy’s store.</p>
-<p>Since the body field was not configured as stored,
-the document returned will only contain
-a title.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>
-    <span class="hljs-keyword">for</span> doc_address <span class="hljs-keyword">in</span> doc_addresses {
-        <span class="hljs-keyword">let</span> retrieved_doc = searcher.doc(&amp;doc_address)?;
-        <span class="hljs-built_in">println!</span>(<span class="hljs-string">"{}"</span>, schema.to_json(&amp;retrieved_doc));
-    }</pre></div></div>
-            
-        </li>
-        
-        
-        <li id="section-26">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-26">&#182;</a>
-              </div>
-              <p>Wait for indexing and merging threads to shut down.
-Usually this isn’t needed, but in <code>main</code> we try to
-delete the temporary directory and that fails on
-Windows if the files are still open.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    index_writer.wait_merging_threads()?;
-
-    <span class="hljs-literal">Ok</span>(())
-}</pre></div></div>
-            
-        </li>
-        
-    </ul>
-  </div>
-</body>
-</html>
--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -0,0 +1,133 @@
+// # Iterating docs and positioms.
+//
+// At its core of tantivy, relies on a data structure
+// called an inverted index.
+//
+// This example shows how to manually iterate through
+// the list of documents containing a term, getting
+// its term frequency, and accessing its positions.
+
+// ---
+// Importing tantivy...
+#[macro_use]
+extern crate tantivy;
+use tantivy::schema::*;
+use tantivy::Index;
+use tantivy::{DocId, DocSet, Postings};
+
+fn main() -> tantivy::Result<()> {
+    // We first create a schema for the sake of the
+    // example. Check the `basic_search` example for more information.
+    let mut schema_builder = Schema::builder();
+
+    // For this example, we need to make sure to index positions for our title
+    // field. `TEXT` precisely does this.
+    let title = schema_builder.add_text_field("title", TEXT | STORED);
+    let schema = schema_builder.build();
+
+    let index = Index::create_in_ram(schema.clone());
+
+    let mut index_writer = index.writer_with_num_threads(1, 50_000_000)?;
+    index_writer.add_document(doc!(title => "The Old Man and the Sea"));
+    index_writer.add_document(doc!(title => "Of Mice and Men"));
+    index_writer.add_document(doc!(title => "The modern Promotheus"));
+    index_writer.commit()?;
+
+    index.load_searchers()?;
+
+    let searcher = index.searcher();
+
+    // A tantivy index is actually a collection of segments.
+    // Similarly, a searcher just wraps a list `segment_reader`.
+    //
+    // (Because we indexed a very small number of documents over one thread
+    // there is actually only one segment here, but let's iterate through the list
+    // anyway)
+    for segment_reader in searcher.segment_readers() {
+        // A segment contains different data structure.
+        // Inverted index stands for the combination of
+        // - the term dictionary
+        // - the inverted lists associated to each terms and their positions
+        let inverted_index = segment_reader.inverted_index(title);
+
+        // A `Term` is a text token associated with a field.
+        // Let's go through all docs containing the term `title:the` and access their position
+        let term_the = Term::from_field_text(title, "the");
+
+        // This segment posting object is like a cursor over the documents matching the term.
+        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies
+        // and positions.
+        //
+        // If you don't need all this information, you may get better performance by decompressing less
+        // information.
+        if let Some(mut segment_postings) =
+            inverted_index.read_postings(&term_the, IndexRecordOption::WithFreqsAndPositions)
+        {
+            // this buffer will be used to request for positions
+            let mut positions: Vec<u32> = Vec::with_capacity(100);
+            while segment_postings.advance() {
+                // the number of time the term appears in the document.
+                let doc_id: DocId = segment_postings.doc(); //< do not try to access this before calling advance once.
+
+                // This MAY contains deleted documents as well.
+                if segment_reader.is_deleted(doc_id) {
+                    continue;
+                }
+
+                // the number of time the term appears in the document.
+                let term_freq: u32 = segment_postings.term_freq();
+                // accessing positions is slightly expensive and lazy, do not request
+                // for them if you don't need them for some documents.
+                segment_postings.positions(&mut positions);
+
+                // By definition we should have `term_freq` positions.
+                assert_eq!(positions.len(), term_freq as usize);
+
+                // This prints:
+                // ```
+                // Doc 0: TermFreq 2: [0, 4]
+                // Doc 2: TermFreq 1: [0]
+                // ```
+                println!("Doc {}: TermFreq {}: {:?}", doc_id, term_freq, positions);
+            }
+        }
+    }
+
+    // A `Term` is a text token associated with a field.
+    // Let's go through all docs containing the term `title:the` and access their position
+    let term_the = Term::from_field_text(title, "the");
+
+    // Some other powerful operations (especially `.skip_to`) may be useful to consume these
+    // posting lists rapidly.
+    // You can check for them in the [`DocSet`](https://docs.rs/tantivy/~0/tantivy/trait.DocSet.html) trait
+    // and the [`Postings`](https://docs.rs/tantivy/~0/tantivy/trait.Postings.html) trait
+
+    // Also, for some VERY specific high performance use case like an OLAP analysis of logs,
+    // you can get better performance by accessing directly the blocks of doc ids.
+    for segment_reader in searcher.segment_readers() {
+        // A segment contains different data structure.
+        // Inverted index stands for the combination of
+        // - the term dictionary
+        // - the inverted lists associated to each terms and their positions
+        let inverted_index = segment_reader.inverted_index(title);
+
+        // This segment posting object is like a cursor over the documents matching the term.
+        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies
+        // and positions.
+        //
+        // If you don't need all this information, you may get better performance by decompressing less
+        // information.
+        if let Some(mut block_segment_postings) =
+            inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic)
+        {
+            while block_segment_postings.advance() {
+                // Once again these docs MAY contains deleted documents as well.
+                let docs = block_segment_postings.docs();
+                // Prints `Docs [0, 2].`
+                println!("Docs {:?}", docs);
+            }
+        }
+    }
+
+    Ok(())
+}
--- a/examples/snippet.rs
+++ b/examples/snippet.rs
@@ -0,0 +1,87 @@
+// # Snippet example
+//
+// This example shows how to return a representative snippet of
+// your hit result.
+// Snippet are an extracted of a target document, and returned in HTML format.
+// The keyword searched by the user are highlighted with a `<b>` tag.
+extern crate tempdir;
+
+// ---
+// Importing tantivy...
+#[macro_use]
+extern crate tantivy;
+use tantivy::collector::TopDocs;
+use tantivy::query::QueryParser;
+use tantivy::schema::*;
+use tantivy::Index;
+use tantivy::{Snippet, SnippetGenerator};
+use tempdir::TempDir;
+
+fn main() -> tantivy::Result<()> {
+    // Let's create a temporary directory for the
+    // sake of this example
+    let index_path = TempDir::new("tantivy_example_dir")?;
+
+    // # Defining the schema
+    let mut schema_builder = Schema::builder();
+    let title = schema_builder.add_text_field("title", TEXT | STORED);
+    let body = schema_builder.add_text_field("body", TEXT | STORED);
+    let schema = schema_builder.build();
+
+    // # Indexing documents
+    let index = Index::create_in_dir(&index_path, schema.clone())?;
+
+    let mut index_writer = index.writer(50_000_000)?;
+
+    // we'll only need one doc for this example.
+    index_writer.add_document(doc!(
+    title => "Of Mice and Men",
+    body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
+            bank and runs deep and green. The water is warm too, for it has slipped twinkling \
+            over the yellow sands in the sunlight before reaching the narrow pool. On one \
+            side of the river the golden foothill slopes curve up to the strong and rocky \
+            Gabilan Mountains, but on the valley side the water is lined with trees—willows \
+            fresh and green with every spring, carrying in their lower leaf junctures the \
+            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
+            limbs and branches that arch over the pool"
+    ));
+    // ...
+    index_writer.commit()?;
+
+    index.load_searchers()?;
+
+    let searcher = index.searcher();
+    let query_parser = QueryParser::for_index(&index, vec![title, body]);
+    let query = query_parser.parse_query("sycamore spring")?;
+
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
+
+    let snippet_generator = SnippetGenerator::create(&searcher, &*query, body)?;
+
+    for (score, doc_address) in top_docs {
+        let doc = searcher.doc(doc_address)?;
+        let snippet = snippet_generator.snippet_from_doc(&doc);
+        println!("Document score {}:", score);
+        println!("title: {}", doc.get_first(title).unwrap().text().unwrap());
+        println!("snippet: {}", snippet.to_html());
+        println!("custom highlighting: {}", highlight(snippet));
+    }
+
+    Ok(())
+}
+
+fn highlight(snippet: Snippet) -> String {
+    let mut result = String::new();
+    let mut start_from = 0;
+
+    for (start, end) in snippet.highlighted().iter().map(|h| h.bounds()) {
+        result.push_str(&snippet.fragments()[start_from..start]);
+        result.push_str(" --> ");
+        result.push_str(&snippet.fragments()[start..end]);
+        result.push_str(" <-- ");
+        start_from = end;
+    }
+
+    result.push_str(&snippet.fragments()[start_from..]);
+    result
+}
--- a/examples/stop_words.rs
+++ b/examples/stop_words.rs
@@ -0,0 +1,117 @@
+// # Stop Words Example
+//
+// This example covers the basic usage of stop words
+// with tantivy
+//
+// We will :
+// - define our schema
+// - create an index in a directory
+// - add a few stop words
+// - index few documents in our index
+
+extern crate tempdir;
+
+// ---
+// Importing tantivy...
+#[macro_use]
+extern crate tantivy;
+use tantivy::collector::TopDocs;
+use tantivy::query::QueryParser;
+use tantivy::schema::*;
+use tantivy::tokenizer::*;
+use tantivy::Index;
+
+fn main() -> tantivy::Result<()> {
+    // this example assumes you understand the content in `basic_search`
+    let mut schema_builder = Schema::builder();
+
+    // This configures your custom options for how tantivy will
+    // store and process your content in the index; The key
+    // to note is that we are setting the tokenizer to `stoppy`
+    // which will be defined and registered below.
+    let text_field_indexing = TextFieldIndexing::default()
+        .set_tokenizer("stoppy")
+        .set_index_option(IndexRecordOption::WithFreqsAndPositions);
+    let text_options = TextOptions::default()
+        .set_indexing_options(text_field_indexing)
+        .set_stored();
+
+    // Our first field is title.
+    schema_builder.add_text_field("title", text_options);
+
+    // Our second field is body.
+    let text_field_indexing = TextFieldIndexing::default()
+        .set_tokenizer("stoppy")
+        .set_index_option(IndexRecordOption::WithFreqsAndPositions);
+    let text_options = TextOptions::default()
+        .set_indexing_options(text_field_indexing)
+        .set_stored();
+    schema_builder.add_text_field("body", text_options);
+
+    let schema = schema_builder.build();
+
+    let index = Index::create_in_ram(schema.clone());
+
+    // This tokenizer lowers all of the text (to help with stop word matching)
+    // then removes all instances of `the` and `and` from the corpus
+    let tokenizer = SimpleTokenizer
+        .filter(LowerCaser)
+        .filter(StopWordFilter::remove(vec![
+            "the".to_string(),
+            "and".to_string(),
+        ]));
+
+    index.tokenizers().register("stoppy", tokenizer);
+
+    let mut index_writer = index.writer(50_000_000)?;
+
+    let title = schema.get_field("title").unwrap();
+    let body = schema.get_field("body").unwrap();
+
+    index_writer.add_document(doc!(
+    title => "The Old Man and the Sea",
+    body => "He was an old man who fished alone in a skiff in the Gulf Stream and \
+     he had gone eighty-four days now without taking a fish."
+    ));
+
+    index_writer.add_document(doc!(
+    title => "Of Mice and Men",
+    body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
+            bank and runs deep and green. The water is warm too, for it has slipped twinkling \
+            over the yellow sands in the sunlight before reaching the narrow pool. On one \
+            side of the river the golden foothill slopes curve up to the strong and rocky \
+            Gabilan Mountains, but on the valley side the water is lined with trees—willows \
+            fresh and green with every spring, carrying in their lower leaf junctures the \
+            debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
+            limbs and branches that arch over the pool"
+    ));
+
+    index_writer.add_document(doc!(
+    title => "Frankenstein",
+    body => "You will rejoice to hear that no disaster has accompanied the commencement of an \
+             enterprise which you have regarded with such evil forebodings.  I arrived here \
+             yesterday, and my first task is to assure my dear sister of my welfare and \
+             increasing confidence in the success of my undertaking."
+    ));
+
+    index_writer.commit()?;
+
+    index.load_searchers()?;
+
+    let searcher = index.searcher();
+
+    let query_parser = QueryParser::for_index(&index, vec![title, body]);
+
+    // stop words are applied on the query as well.
+    // The following will be equivalent to `title:frankenstein`
+    let query = query_parser.parse_query("title:\"the Frankenstein\"")?;
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
+
+    for (score, doc_address) in top_docs {
+        let retrieved_doc = searcher.doc(doc_address)?;
+        println!("\n==\nDocument score {}:", score);
+        println!("{}", schema.to_json(&retrieved_doc));
+    }
+
+    Ok(())
+}
--- a/examples/working_with_json.rs
+++ b/examples/working_with_json.rs
@@ -0,0 +1,41 @@
+extern crate tantivy;
+use tantivy::schema::*;
+
+// # Document from json
+//
+// For convenience, `Document` can be parsed directly from json.
+fn main() -> tantivy::Result<()> {
+    // Let's first define a schema and an index.
+    // Check out the basic example if this is confusing to you.
+    //
+    // first we need to define a schema ...
+    let mut schema_builder = Schema::builder();
+    schema_builder.add_text_field("title", TEXT | STORED);
+    schema_builder.add_text_field("body", TEXT);
+    schema_builder.add_u64_field("year", INT_INDEXED);
+    let schema = schema_builder.build();
+
+    // Let's assume we have a json-serialized document.
+    let mice_and_men_doc_json = r#"{
+       "title": "Of Mice and Men",
+       "year": 1937
+    }"#;
+
+    // We can parse our document
+    let _mice_and_men_doc = schema.parse_document(&mice_and_men_doc_json)?;
+
+    // Multi-valued field are allowed, they are
+    // expressed in JSON by an array.
+    // The following document has two titles.
+    let frankenstein_json = r#"{
+       "title": ["Frankenstein", "The Modern Prometheus"],
+       "year": 1818
+    }"#;
+    let _frankenstein_doc = schema.parse_document(&frankenstein_json)?;
+
+    // Note that the schema is saved in your index directory.
+    //
+    // As a result, Indexes are aware of their schema, and you can use this feature
+    // just by opening an existing `Index`, and calling `index.schema()..parse_document(json)`.
+    Ok(())
+}
--- a/run-tests.sh
+++ b/run-tests.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+cargo test --no-default-features --features mmap -- --test-threads 1
--- a/src/collector/chained_collector.rs
+++ b/src/collector/chained_collector.rs
@@ -1,142 +0,0 @@
-use collector::Collector;
-use DocId;
-use Result;
-use Score;
-use SegmentLocalId;
-use SegmentReader;
-
-/// Collector that does nothing.
-/// This is used in the chain Collector and will hopefully
-/// be optimized away by the compiler.
-pub struct DoNothingCollector;
-impl Collector for DoNothingCollector {
-    #[inline]
-    fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
-        Ok(())
-    }
-    #[inline]
-    fn collect(&mut self, _doc: DocId, _score: Score) {}
-    #[inline]
-    fn requires_scoring(&self) -> bool {
-        false
-    }
-}
-
-/// Zero-cost abstraction used to collect on multiple collectors.
-/// This contraption is only usable if the type of your collectors
-/// are known at compile time.
-///
-/// ```rust
-/// #[macro_use]
-/// extern crate tantivy;
-/// use tantivy::schema::{SchemaBuilder, TEXT};
-/// use tantivy::{Index, Result};
-/// use tantivy::collector::{CountCollector, TopCollector, chain};
-/// use tantivy::query::QueryParser;
-///
-/// # fn main() { example().unwrap(); }
-/// fn example() -> Result<()> {
-///     let mut schema_builder = SchemaBuilder::new();
-///     let title = schema_builder.add_text_field("title", TEXT);
-///     let schema = schema_builder.build();
-///     let index = Index::create_in_ram(schema);
-///     {
-///         let mut index_writer = index.writer(3_000_000)?;
-///         index_writer.add_document(doc!(
-///             title => "The Name of the Wind",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "The Diary of Muadib",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "A Dairy Cow",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "The Diary of a Young Girl",
-///         ));
-///         index_writer.commit().unwrap();
-///     }
-///
-///     index.load_searchers()?;
-///     let searcher = index.searcher();
-///
-///     {
-///         let mut top_collector = TopCollector::with_limit(2);
-///         let mut count_collector = CountCollector::default();
-///         {
-///             let mut collectors = chain().push(&mut top_collector).push(&mut count_collector);
-///             let query_parser = QueryParser::for_index(&index, vec![title]);
-///             let query = query_parser.parse_query("diary")?;
-///             searcher.search(&*query, &mut collectors).unwrap();
-///         }
-///         assert_eq!(count_collector.count(), 2);
-///         assert!(top_collector.at_capacity());
-///     }
-///
-///     Ok(())
-/// }
-/// ```
-pub struct ChainedCollector<Left: Collector, Right: Collector> {
-    left: Left,
-    right: Right,
-}
-
-impl<Left: Collector, Right: Collector> ChainedCollector<Left, Right> {
-    /// Adds a collector
-    pub fn push<C: Collector>(self, new_collector: &mut C) -> ChainedCollector<Self, &mut C> {
-        ChainedCollector {
-            left: self,
-            right: new_collector,
-        }
-    }
-}
-
-impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Right> {
-    fn set_segment(
-        &mut self,
-        segment_local_id: SegmentLocalId,
-        segment: &SegmentReader,
-    ) -> Result<()> {
-        self.left.set_segment(segment_local_id, segment)?;
-        self.right.set_segment(segment_local_id, segment)?;
-        Ok(())
-    }
-
-    fn collect(&mut self, doc: DocId, score: Score) {
-        self.left.collect(doc, score);
-        self.right.collect(doc, score);
-    }
-
-    fn requires_scoring(&self) -> bool {
-        self.left.requires_scoring() || self.right.requires_scoring()
-    }
-}
-
-/// Creates a `ChainedCollector`
-pub fn chain() -> ChainedCollector<DoNothingCollector, DoNothingCollector> {
-    ChainedCollector {
-        left: DoNothingCollector,
-        right: DoNothingCollector,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use collector::{Collector, CountCollector, TopCollector};
-
-    #[test]
-    fn test_chained_collector() {
-        let mut top_collector = TopCollector::with_limit(2);
-        let mut count_collector = CountCollector::default();
-        {
-            let mut collectors = chain().push(&mut top_collector).push(&mut count_collector);
-            collectors.collect(1, 0.2);
-            collectors.collect(2, 0.1);
-            collectors.collect(3, 0.5);
-        }
-        assert_eq!(count_collector.count(), 3);
-        assert!(top_collector.at_capacity());
-    }
-}
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -1,4 +1,5 @@
 use super::Collector;
+use collector::SegmentCollector;
 use DocId;
 use Result;
 use Score;
@@ -11,14 +12,14 @@ use SegmentReader;
 /// ```rust
 /// #[macro_use]
 /// extern crate tantivy;
-/// use tantivy::schema::{SchemaBuilder, TEXT};
+/// use tantivy::schema::{Schema, TEXT};
 /// use tantivy::{Index, Result};
-/// use tantivy::collector::CountCollector;
+/// use tantivy::collector::Count;
 /// use tantivy::query::QueryParser;
 ///
 /// # fn main() { example().unwrap(); }
 /// fn example() -> Result<()> {
-///     let mut schema_builder = SchemaBuilder::new();
+///     let mut schema_builder = Schema::builder();
 ///     let title = schema_builder.add_text_field("title", TEXT);
 ///     let schema = schema_builder.build();
 ///     let index = Index::create_in_ram(schema);
@@ -43,59 +44,86 @@ use SegmentReader;
 ///     let searcher = index.searcher();
 ///
 ///     {
-///	        let mut count_collector = CountCollector::default();
 ///         let query_parser = QueryParser::for_index(&index, vec![title]);
 ///         let query = query_parser.parse_query("diary")?;
-///         searcher.search(&*query, &mut count_collector).unwrap();
+///         let count = searcher.search(&query, &Count).unwrap();
 ///
-///         assert_eq!(count_collector.count(), 2);
+///         assert_eq!(count, 2);
 ///     }
 ///
 ///     Ok(())
 /// }
 /// ```
-#[derive(Default)]
-pub struct CountCollector {
-    count: usize,
-}
+pub struct Count;

-impl CountCollector {
-    /// Returns the count of documents that were
-    /// collected.
-    pub fn count(&self) -> usize {
-        self.count
-    }
-}
+impl Collector for Count {
+    type Fruit = usize;

-impl Collector for CountCollector {
-    fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
-        Ok(())
-    }
+    type Child = SegmentCountCollector;

-    fn collect(&mut self, _: DocId, _: Score) {
-        self.count += 1;
+    fn for_segment(&self, _: SegmentLocalId, _: &SegmentReader) -> Result<SegmentCountCollector> {
+        Ok(SegmentCountCollector::default())
    }

    fn requires_scoring(&self) -> bool {
        false
    }
+
+    fn merge_fruits(&self, segment_counts: Vec<usize>) -> Result<usize> {
+        Ok(segment_counts.into_iter().sum())
+    }
+}
+
+#[derive(Default)]
+pub struct SegmentCountCollector {
+    count: usize,
+}
+
+impl SegmentCollector for SegmentCountCollector {
+    type Fruit = usize;
+
+    fn collect(&mut self, _: DocId, _: Score) {
+        self.count += 1;
+    }
+
+    fn harvest(self) -> usize {
+        self.count
+    }
 }

 #[cfg(test)]
 mod tests {
-
-    use collector::{Collector, CountCollector};
+    use super::{Count, SegmentCountCollector};
+    use collector::Collector;
+    use collector::SegmentCollector;

    #[test]
-    fn test_count_collector() {
-        let mut count_collector = CountCollector::default();
-        assert_eq!(count_collector.count(), 0);
-        count_collector.collect(0u32, 1f32);
-        assert_eq!(count_collector.count(), 1);
-        assert_eq!(count_collector.count(), 1);
-        count_collector.collect(1u32, 1f32);
-        assert_eq!(count_collector.count(), 2);
-        assert!(!count_collector.requires_scoring());
+    fn test_count_collect_does_not_requires_scoring() {
+        assert!(!Count.requires_scoring());
+    }
+
+    #[test]
+    fn test_segment_count_collector() {
+        {
+            let count_collector = SegmentCountCollector::default();
+            assert_eq!(count_collector.harvest(), 0);
+        }
+        {
+            let mut count_collector = SegmentCountCollector::default();
+            count_collector.collect(0u32, 1f32);
+            assert_eq!(count_collector.harvest(), 1);
+        }
+        {
+            let mut count_collector = SegmentCountCollector::default();
+            count_collector.collect(0u32, 1f32);
+            assert_eq!(count_collector.harvest(), 1);
+        }
+        {
+            let mut count_collector = SegmentCountCollector::default();
+            count_collector.collect(0u32, 1f32);
+            count_collector.collect(1u32, 1f32);
+            assert_eq!(count_collector.harvest(), 2);
+        }
    }

 }
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -1,20 +1,17 @@
 use collector::Collector;
+use collector::SegmentCollector;
 use docset::SkipResult;
 use fastfield::FacetReader;
 use schema::Facet;
 use schema::Field;
-use std::cell::UnsafeCell;
+use std::cmp::Ordering;
 use std::collections::btree_map;
 use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 use std::collections::BinaryHeap;
 use std::collections::Bound;
 use std::iter::Peekable;
-use std::mem;
 use std::{u64, usize};
-use termdict::TermMerger;
-
-use std::cmp::Ordering;
 use DocId;
 use Result;
 use Score;
@@ -46,12 +43,6 @@ impl<'a> Ord for Hit<'a> {
    }
 }

-struct SegmentFacetCounter {
-    pub facet_reader: FacetReader,
-    pub facet_ords: Vec<u64>,
-    pub facet_counts: Vec<u64>,
-}
-
 fn facet_depth(facet_bytes: &[u8]) -> usize {
    if facet_bytes.is_empty() {
        0
@@ -91,14 +82,14 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 /// ```rust
 /// #[macro_use]
 /// extern crate tantivy;
-/// use tantivy::schema::{Facet, SchemaBuilder, TEXT};
+/// use tantivy::schema::{Facet, Schema, TEXT};
 /// use tantivy::{Index, Result};
 /// use tantivy::collector::FacetCollector;
 /// use tantivy::query::AllQuery;
 ///
 /// # fn main() { example().unwrap(); }
 /// fn example() -> Result<()> {
-///     let mut schema_builder = SchemaBuilder::new();
+///     let mut schema_builder = Schema::builder();
 ///
 ///     // Facet have their own specific type.
 ///     // It is not a bad practise to put all of your
@@ -141,13 +132,10 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 ///			let mut facet_collector = FacetCollector::for_field(facet);
 ///         facet_collector.add_facet("/lang");
 ///         facet_collector.add_facet("/category");
-///         searcher.search(&AllQuery, &mut facet_collector).unwrap();
-///
-///         // this object contains count aggregate for all of the facets.
-///         let counts = facet_collector.harvest();
+///         let facet_counts = searcher.search(&AllQuery, &facet_collector).unwrap();
 ///
 ///         // This lists all of the facet counts
-///         let facets: Vec<(&Facet, u64)> = counts
+///         let facets: Vec<(&Facet, u64)> = facet_counts
 ///             .get("/category")
 ///             .collect();
 ///         assert_eq!(facets, vec![
@@ -159,13 +147,10 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 ///     {
 ///			let mut facet_collector = FacetCollector::for_field(facet);
 ///         facet_collector.add_facet("/category/fiction");
-///         searcher.search(&AllQuery, &mut facet_collector).unwrap();
-///
-///         // this object contains count aggregate for all of the facets.
-///         let counts = facet_collector.harvest();
+///         let facet_counts = searcher.search(&AllQuery, &facet_collector).unwrap();
 ///
 ///         // This lists all of the facet counts
-///         let facets: Vec<(&Facet, u64)> = counts
+///         let facets: Vec<(&Facet, u64)> = facet_counts
 ///             .get("/category/fiction")
 ///             .collect();
 ///         assert_eq!(facets, vec![
@@ -178,13 +163,10 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 ///    {
 ///			let mut facet_collector = FacetCollector::for_field(facet);
 ///         facet_collector.add_facet("/category/fiction");
-///         searcher.search(&AllQuery, &mut facet_collector).unwrap();
-///
-///         // this object contains count aggregate for all of the facets.
-///         let counts = facet_collector.harvest();
+///         let facet_counts = searcher.search(&AllQuery, &facet_collector).unwrap();
 ///
 ///         // This lists all of the facet counts
-///         let facets: Vec<(&Facet, u64)> = counts.top_k("/category/fiction", 1);
+///         let facets: Vec<(&Facet, u64)> = facet_counts.top_k("/category/fiction", 1);
 ///         assert_eq!(facets, vec![
 ///             (&Facet::from("/category/fiction/fantasy"), 2)
 ///         ]);
@@ -194,28 +176,28 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 /// }
 /// ```
 pub struct FacetCollector {
-    facet_ords: Vec<u64>,
    field: Field,
-    ff_reader: Option<UnsafeCell<FacetReader>>,
-    segment_counters: Vec<SegmentFacetCounter>,
-
-    // facet_ord -> collapse facet_id
-    current_segment_collapse_mapping: Vec<usize>,
-    // collapse facet_id -> count
-    current_segment_counts: Vec<u64>,
-    // collapse facet_id -> facet_ord
-    current_collapse_facet_ords: Vec<u64>,
-
    facets: BTreeSet<Facet>,
 }

+pub struct FacetSegmentCollector {
+    reader: FacetReader,
+    facet_ords_buf: Vec<u64>,
+    // facet_ord -> collapse facet_id
+    collapse_mapping: Vec<usize>,
+    // collapse facet_id -> count
+    counts: Vec<u64>,
+    // collapse facet_id -> facet_ord
+    collapse_facet_ords: Vec<u64>,
+}
+
 fn skip<'a, I: Iterator<Item = &'a Facet>>(
    target: &[u8],
    collapse_it: &mut Peekable<I>,
 ) -> SkipResult {
    loop {
        match collapse_it.peek() {
-            Some(facet_bytes) => match facet_bytes.encoded_bytes().cmp(target) {
+            Some(facet_bytes) => match facet_bytes.encoded_str().as_bytes().cmp(target) {
                Ordering::Less => {}
                Ordering::Greater => {
                    return SkipResult::OverStep;
@@ -240,15 +222,8 @@ impl FacetCollector {
    /// is of the proper type.
    pub fn for_field(field: Field) -> FacetCollector {
        FacetCollector {
-            facet_ords: Vec::with_capacity(255),
-            segment_counters: Vec::new(),
            field,
-            ff_reader: None,
-            facets: BTreeSet::new(),
-
-            current_segment_collapse_mapping: Vec::new(),
-            current_collapse_facet_ords: Vec::new(),
-            current_segment_counts: Vec::new(),
+            facets: BTreeSet::default(),
        }
    }

@@ -278,141 +253,100 @@ impl FacetCollector {
        }
        self.facets.insert(facet);
    }
-
-    fn set_collapse_mapping(&mut self, facet_reader: &FacetReader) {
-        self.current_segment_collapse_mapping.clear();
-        self.current_collapse_facet_ords.clear();
-        self.current_segment_counts.clear();
-        let mut collapse_facet_it = self.facets.iter().peekable();
-        self.current_collapse_facet_ords.push(0);
-        let mut facet_streamer = facet_reader.facet_dict().range().into_stream();
-        if !facet_streamer.advance() {
-            return;
-        }
-        'outer: loop {
-            // at the begining of this loop, facet_streamer
-            // is positionned on a term that has not been processed yet.
-            let skip_result = skip(facet_streamer.key(), &mut collapse_facet_it);
-            match skip_result {
-                SkipResult::Reached => {
-                    // we reach a facet we decided to collapse.
-                    let collapse_depth = facet_depth(facet_streamer.key());
-                    let mut collapsed_id = 0;
-                    self.current_segment_collapse_mapping.push(0);
-                    while facet_streamer.advance() {
-                        let depth = facet_depth(facet_streamer.key());
-                        if depth <= collapse_depth {
-                            continue 'outer;
-                        }
-                        if depth == collapse_depth + 1 {
-                            collapsed_id = self.current_collapse_facet_ords.len();
-                            self.current_collapse_facet_ords
-                                .push(facet_streamer.term_ord());
-                            self.current_segment_collapse_mapping.push(collapsed_id);
-                        } else {
-                            self.current_segment_collapse_mapping.push(collapsed_id);
-                        }
-                    }
-                    break;
-                }
-                SkipResult::End | SkipResult::OverStep => {
-                    self.current_segment_collapse_mapping.push(0);
-                    if !facet_streamer.advance() {
-                        break;
-                    }
-                }
-            }
-        }
-    }
-
-    fn finalize_segment(&mut self) {
-        if self.ff_reader.is_some() {
-            self.segment_counters.push(SegmentFacetCounter {
-                facet_reader: self.ff_reader.take().unwrap().into_inner(),
-                facet_ords: mem::replace(&mut self.current_collapse_facet_ords, Vec::new()),
-                facet_counts: mem::replace(&mut self.current_segment_counts, Vec::new()),
-            });
-        }
-    }
-
-    /// Returns the results of the collection.
-    ///
-    /// This method does not just return the counters,
-    /// it also translates the facet ordinals of the last segment.
-    pub fn harvest(mut self) -> FacetCounts {
-        self.finalize_segment();
-
-        let collapsed_facet_ords: Vec<&[u64]> = self.segment_counters
-            .iter()
-            .map(|segment_counter| &segment_counter.facet_ords[..])
-            .collect();
-        let collapsed_facet_counts: Vec<&[u64]> = self.segment_counters
-            .iter()
-            .map(|segment_counter| &segment_counter.facet_counts[..])
-            .collect();
-
-        let facet_streams = self.segment_counters
-            .iter()
-            .map(|seg_counts| seg_counts.facet_reader.facet_dict().range().into_stream())
-            .collect::<Vec<_>>();
-
-        let mut facet_merger = TermMerger::new(facet_streams);
-        let mut facet_counts = BTreeMap::new();
-
-        while facet_merger.advance() {
-            let count = facet_merger
-                .current_kvs()
-                .iter()
-                .map(|it| {
-                    let seg_ord = it.segment_ord;
-                    let term_ord = it.streamer.term_ord();
-                    collapsed_facet_ords[seg_ord]
-                        .binary_search(&term_ord)
-                        .map(|collapsed_term_id| {
-                            if collapsed_term_id == 0 {
-                                0
-                            } else {
-                                collapsed_facet_counts[seg_ord][collapsed_term_id]
-                            }
-                        })
-                        .unwrap_or(0)
-                })
-                .sum();
-            if count > 0u64 {
-                let bytes: Vec<u8> = facet_merger.key().to_owned();
-                // may create an corrupted facet if the term dicitonary is corrupted
-                let facet = unsafe { Facet::from_encoded(bytes) };
-                facet_counts.insert(facet, count);
-            }
-        }
-        FacetCounts { facet_counts }
-    }
 }

 impl Collector for FacetCollector {
-    fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
-        self.finalize_segment();
+    type Fruit = FacetCounts;
+
+    type Child = FacetSegmentCollector;
+
+    fn for_segment(
+        &self,
+        _: SegmentLocalId,
+        reader: &SegmentReader,
+    ) -> Result<FacetSegmentCollector> {
        let facet_reader = reader.facet_reader(self.field)?;
-        self.set_collapse_mapping(&facet_reader);
-        self.current_segment_counts
-            .resize(self.current_collapse_facet_ords.len(), 0);
-        self.ff_reader = Some(UnsafeCell::new(facet_reader));
-        Ok(())
+
+        let mut collapse_mapping = Vec::new();
+        let mut counts = Vec::new();
+        let mut collapse_facet_ords = Vec::new();
+
+        let mut collapse_facet_it = self.facets.iter().peekable();
+        collapse_facet_ords.push(0);
+        {
+            let mut facet_streamer = facet_reader.facet_dict().range().into_stream();
+            if facet_streamer.advance() {
+                'outer: loop {
+                    // at the begining of this loop, facet_streamer
+                    // is positionned on a term that has not been processed yet.
+                    let skip_result = skip(facet_streamer.key(), &mut collapse_facet_it);
+                    match skip_result {
+                        SkipResult::Reached => {
+                            // we reach a facet we decided to collapse.
+                            let collapse_depth = facet_depth(facet_streamer.key());
+                            let mut collapsed_id = 0;
+                            collapse_mapping.push(0);
+                            while facet_streamer.advance() {
+                                let depth = facet_depth(facet_streamer.key());
+                                if depth <= collapse_depth {
+                                    continue 'outer;
+                                }
+                                if depth == collapse_depth + 1 {
+                                    collapsed_id = collapse_facet_ords.len();
+                                    collapse_facet_ords.push(facet_streamer.term_ord());
+                                    collapse_mapping.push(collapsed_id);
+                                } else {
+                                    collapse_mapping.push(collapsed_id);
+                                }
+                            }
+                            break;
+                        }
+                        SkipResult::End | SkipResult::OverStep => {
+                            collapse_mapping.push(0);
+                            if !facet_streamer.advance() {
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        counts.resize(collapse_facet_ords.len(), 0);
+
+        Ok(FacetSegmentCollector {
+            reader: facet_reader,
+            facet_ords_buf: Vec::with_capacity(255),
+            collapse_mapping,
+            counts,
+            collapse_facet_ords,
+        })
    }

+    fn requires_scoring(&self) -> bool {
+        false
+    }
+
+    fn merge_fruits(&self, segments_facet_counts: Vec<FacetCounts>) -> Result<FacetCounts> {
+        let mut facet_counts: BTreeMap<Facet, u64> = BTreeMap::new();
+        for segment_facet_counts in segments_facet_counts {
+            for (facet, count) in segment_facet_counts.facet_counts {
+                *(facet_counts.entry(facet).or_insert(0)) += count;
+            }
+        }
+        Ok(FacetCounts { facet_counts })
+    }
+}
+
+impl SegmentCollector for FacetSegmentCollector {
+    type Fruit = FacetCounts;
+
    fn collect(&mut self, doc: DocId, _: Score) {
-        let facet_reader: &mut FacetReader = unsafe {
-            &mut *self.ff_reader
-                .as_ref()
-                .expect("collect() was called before set_segment. This should never happen.")
-                .get()
-        };
-        facet_reader.facet_ords(doc, &mut self.facet_ords);
+        self.reader.facet_ords(doc, &mut self.facet_ords_buf);
        let mut previous_collapsed_ord: usize = usize::MAX;
-        for &facet_ord in &self.facet_ords {
-            let collapsed_ord = self.current_segment_collapse_mapping[facet_ord as usize];
-            self.current_segment_counts[collapsed_ord] += if collapsed_ord == previous_collapsed_ord
-            {
+        for &facet_ord in &self.facet_ords_buf {
+            let collapsed_ord = self.collapse_mapping[facet_ord as usize];
+            self.counts[collapsed_ord] += if collapsed_ord == previous_collapsed_ord {
                0
            } else {
                1
@@ -421,8 +355,24 @@ impl Collector for FacetCollector {
        }
    }

-    fn requires_scoring(&self) -> bool {
-        false
+    /// Returns the results of the collection.
+    ///
+    /// This method does not just return the counters,
+    /// it also translates the facet ordinals of the last segment.
+    fn harvest(self) -> FacetCounts {
+        let mut facet_counts = BTreeMap::new();
+        let facet_dict = self.reader.facet_dict();
+        for (collapsed_facet_ord, count) in self.counts.iter().cloned().enumerate() {
+            if count == 0 {
+                continue;
+            }
+            let mut facet = vec![];
+            let facet_ord = self.collapse_facet_ords[collapsed_facet_ord];
+            facet_dict.ord_to_term(facet_ord as u64, &mut facet);
+            // TODO
+            facet_counts.insert(Facet::from_encoded(facet).unwrap(), count);
+        }
+        FacetCounts { facet_counts }
    }
 }

@@ -454,9 +404,9 @@ impl FacetCounts {
        let right_bound = if facet.is_root() {
            Bound::Unbounded
        } else {
-            let mut facet_after_bytes: Vec<u8> = facet.encoded_bytes().to_owned();
-            facet_after_bytes.push(1u8);
-            let facet_after = unsafe { Facet::from_encoded(facet_after_bytes) }; // ok logic
+            let mut facet_after_bytes: String = facet.encoded_str().to_owned();
+            facet_after_bytes.push('\u{1}');
+            let facet_after = Facet::from_encoded_string(facet_after_bytes);
            Bound::Excluded(facet_after)
        };
        let underlying: btree_map::Range<_, _> = self.facet_counts.range((left_bound, right_bound));
@@ -470,17 +420,24 @@ impl FacetCounts {
        let mut heap = BinaryHeap::with_capacity(k);
        let mut it = self.get(facet);

+        // push the first k elements to first bring the heap
+        // to capacity
        for (facet, count) in (&mut it).take(k) {
            heap.push(Hit { count, facet });
        }

-        let mut lowest_count: u64 = heap.peek().map(|hit| hit.count).unwrap_or(u64::MIN);
+        let mut lowest_count: u64 = heap.peek().map(|hit| hit.count).unwrap_or(u64::MIN); //< the `unwrap_or` case may be triggered but the value
+                                                                                          // is never used in that case.
+
        for (facet, count) in it {
            if count > lowest_count {
-                lowest_count = count;
                if let Some(mut head) = heap.peek_mut() {
                    *head = Hit { count, facet };
                }
+                // the heap gets reconstructed at this point
+                if let Some(head) = heap.peek() {
+                    lowest_count = head.count;
+                }
            }
        }
        heap.into_sorted_vec()
@@ -495,14 +452,15 @@ mod tests {
    use super::{FacetCollector, FacetCounts};
    use core::Index;
    use query::AllQuery;
+    use rand::distributions::Uniform;
+    use rand::prelude::SliceRandom;
    use rand::{thread_rng, Rng};
-    use schema::Field;
-    use schema::{Document, Facet, SchemaBuilder};
+    use schema::{Document, Facet, Field, Schema};
    use std::iter;

    #[test]
    fn test_facet_collector_drilldown() {
-        let mut schema_builder = SchemaBuilder::new();
+        let mut schema_builder = Schema::builder();
        let facet_field = schema_builder.add_facet_field("facet");
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
@@ -527,12 +485,10 @@ mod tests {
        index_writer.commit().unwrap();
        index.load_searchers().unwrap();
        let searcher = index.searcher();
-
        let mut facet_collector = FacetCollector::for_field(facet_field);
        facet_collector.add_facet(Facet::from("/top1"));
-        searcher.search(&AllQuery, &mut facet_collector).unwrap();
+        let counts = searcher.search(&AllQuery, &facet_collector).unwrap();

-        let counts: FacetCounts = facet_collector.harvest();
        {
            let facets: Vec<(String, u64)> = counts
                .get("/top1")
@@ -545,24 +501,47 @@ mod tests {
                    ("/top1/mid1", 50),
                    ("/top1/mid2", 50),
                    ("/top1/mid3", 50),
-                ].iter()
-                    .map(|&(facet_str, count)| (String::from(facet_str), count))
-                    .collect::<Vec<_>>()
+                ]
+                .iter()
+                .map(|&(facet_str, count)| (String::from(facet_str), count))
+                .collect::<Vec<_>>()
            );
        }
    }

    #[test]
-    #[should_panic(
-        expected = "Tried to add a facet which is a descendant of \
-                    an already added facet."
-    )]
+    #[should_panic(expected = "Tried to add a facet which is a descendant of \
+                               an already added facet.")]
    fn test_misused_facet_collector() {
        let mut facet_collector = FacetCollector::for_field(Field(0));
        facet_collector.add_facet(Facet::from("/country"));
        facet_collector.add_facet(Facet::from("/country/europe"));
    }

+    #[test]
+    fn test_doc_unsorted_multifacet() {
+        let mut schema_builder = Schema::builder();
+        let facet_field = schema_builder.add_facet_field("facets");
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        index_writer.add_document(doc!(
+            facet_field => Facet::from_text(&"/subjects/A/a"),
+            facet_field => Facet::from_text(&"/subjects/B/a"),
+            facet_field => Facet::from_text(&"/subjects/A/b"),
+            facet_field => Facet::from_text(&"/subjects/B/b"),
+        ));
+        index_writer.commit().unwrap();
+        index.load_searchers().unwrap();
+        let searcher = index.searcher();
+        assert_eq!(searcher.num_docs(), 1);
+        let mut facet_collector = FacetCollector::for_field(facet_field);
+        facet_collector.add_facet("/subjects");
+        let counts = searcher.search(&AllQuery, &facet_collector).unwrap();
+        let facets: Vec<(&Facet, u64)> = counts.get("/subjects").collect();
+        assert_eq!(facets[0].1, 1);
+    }
+
    #[test]
    fn test_non_used_facet_collector() {
        let mut facet_collector = FacetCollector::for_field(Field(0));
@@ -572,20 +551,28 @@ mod tests {

    #[test]
    fn test_facet_collector_topk() {
-        let mut schema_builder = SchemaBuilder::new();
+        let mut schema_builder = Schema::builder();
        let facet_field = schema_builder.add_facet_field("facet");
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);

+        let uniform = Uniform::new_inclusive(1, 100_000);
        let mut docs: Vec<Document> = vec![("a", 10), ("b", 100), ("c", 7), ("d", 12), ("e", 21)]
            .into_iter()
            .flat_map(|(c, count)| {
-                let facet = Facet::from(&format!("/facet_{}", c));
+                let facet = Facet::from(&format!("/facet/{}", c));
                let doc = doc!(facet_field => facet);
                iter::repeat(doc).take(count)
            })
+            .map(|mut doc| {
+                doc.add_facet(
+                    facet_field,
+                    &format!("/facet/{}", thread_rng().sample(&uniform)),
+                );
+                doc
+            })
            .collect();
-        thread_rng().shuffle(&mut docs[..]);
+        docs[..].shuffle(&mut thread_rng());

        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
        for doc in docs {
@@ -597,18 +584,17 @@ mod tests {
        let searcher = index.searcher();

        let mut facet_collector = FacetCollector::for_field(facet_field);
-        facet_collector.add_facet("/");
-        searcher.search(&AllQuery, &mut facet_collector).unwrap();
+        facet_collector.add_facet("/facet");
+        let counts: FacetCounts = searcher.search(&AllQuery, &facet_collector).unwrap();

-        let counts: FacetCounts = facet_collector.harvest();
        {
-            let facets: Vec<(&Facet, u64)> = counts.top_k("/", 3);
+            let facets: Vec<(&Facet, u64)> = counts.top_k("/facet", 3);
            assert_eq!(
                facets,
                vec![
-                    (&Facet::from("/facet_b"), 100),
-                    (&Facet::from("/facet_e"), 21),
-                    (&Facet::from("/facet_d"), 12),
+                    (&Facet::from("/facet/b"), 100),
+                    (&Facet::from("/facet/e"), 21),
+                    (&Facet::from("/facet/d"), 12),
                ]
            );
        }
@@ -623,13 +609,13 @@ mod bench {
    use query::AllQuery;
    use rand::{thread_rng, Rng};
    use schema::Facet;
-    use schema::SchemaBuilder;
+    use schema::Schema;
    use test::Bencher;
    use Index;

    #[bench]
    fn bench_facet_collector(b: &mut Bencher) {
-        let mut schema_builder = SchemaBuilder::new();
+        let mut schema_builder = Schema::builder();
        let facet_field = schema_builder.add_facet_field("facet");
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
@@ -653,8 +639,8 @@ mod bench {

        b.iter(|| {
            let searcher = index.searcher();
-            let mut facet_collector = FacetCollector::for_field(facet_field);
-            searcher.search(&AllQuery, &mut facet_collector).unwrap();
+            let facet_collector = FacetCollector::for_field(facet_field);
+            searcher.search(&AllQuery, &facet_collector).unwrap();
        });
    }
 }
--- a/src/collector/int_facet_collector.rs
+++ b/src/collector/int_facet_collector.rs
@@ -79,7 +79,7 @@ mod tests {
    // make sure we have facet counters correctly filled
    fn test_facet_collector_results() {

-        let mut schema_builder = schema::SchemaBuilder::new();
+        let mut schema_builder = schema::Schema::builder();
        let num_field_i64 = schema_builder.add_i64_field("num_i64", FAST);
        let num_field_u64 = schema_builder.add_u64_field("num_u64", FAST);
        let text_field = schema_builder.add_text_field("text", STRING);
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -1,7 +1,91 @@
 /*!
-Defines how the documents matching a search query should be processed.
+
+# Collectors
+
+Collectors define the information you want to extract from the documents matching the queries.
+In tantivy jargon, we call this information your search "fruit".
+
+Your fruit could for instance be :
+- [the count of matching documents](./struct.Count.html)
+- [the top 10 documents, by relevancy or by a fast field](./struct.TopDocs.html)
+- [facet counts](./struct.FacetCollector.html)
+
+At one point in your code, you will trigger the actual search operation by calling
+[the `search(...)` method of your `Searcher` object](../struct.Searcher.html#method.search).
+This call will look like this.
+
+```verbatim
+let fruit = searcher.search(&query, &collector)?;
+```
+
+Here the type of fruit is actually determined as an associated type of the collector (`Collector::Fruit`).
+
+
+# Combining several collectors
+
+A rich search experience often requires to run several collectors on your search query.
+For instance,
+- selecting the top-K products matching your query
+- counting the matching documents
+- computing several facets
+- computing statistics about the matching product prices
+
+A simple and efficient way to do that is to pass your collectors as one tuple.
+The resulting `Fruit` will then be a typed tuple with each collector's original fruits
+in their respective position.
+
+```rust
+# extern crate tantivy;
+# use tantivy::schema::*;
+# use tantivy::*;
+# use tantivy::query::*;
+use tantivy::collector::{Count, TopDocs};
+#
+# fn main() -> tantivy::Result<()> {
+# let mut schema_builder = Schema::builder();
+#     let title = schema_builder.add_text_field("title", TEXT);
+#     let schema = schema_builder.build();
+#     let index = Index::create_in_ram(schema);
+#     let mut index_writer = index.writer(3_000_000)?;
+#       index_writer.add_document(doc!(
+#       title => "The Name of the Wind",
+#      ));
+#     index_writer.add_document(doc!(
+#        title => "The Diary of Muadib",
+#     ));
+#     index_writer.commit().unwrap();
+#     index.load_searchers()?;
+#     let searcher = index.searcher();
+#     let query_parser = QueryParser::for_index(&index, vec![title]);
+#     let query = query_parser.parse_query("diary")?;
+let (doc_count, top_docs): (usize, Vec<(Score, DocAddress)>) =
+    searcher.search(&query, &(Count, TopDocs::with_limit(2)))?;
+#     Ok(())
+# }
+```
+
+The `Collector` trait is implemented for up to 4 collectors.
+If you have more than 4 collectors, you can either group them into
+tuples of tuples `(a,(b,(c,d)))`, or rely on `MultiCollector`'s.
+
+# Combining several collectors dynamically
+
+Combining collectors into a tuple is a zero-cost abstraction: everything
+happens as if you had manually implemented a single collector
+combining all of our features.
+
+Unfortunately it requires you to know at compile time your collector types.
+If on the other hand, the collectors depend on some query parameter,
+you can rely on `MultiCollector`'s.
+
+
+# Implementing your own collectors.
+
+See the `custom_collector` example.
+
 */

+use downcast;
 use DocId;
 use Result;
 use Score;
@@ -9,238 +93,275 @@ use SegmentLocalId;
 use SegmentReader;

 mod count_collector;
-pub use self::count_collector::CountCollector;
+pub use self::count_collector::Count;

 mod multi_collector;
 pub use self::multi_collector::MultiCollector;

 mod top_collector;
-pub use self::top_collector::TopCollector;
+
+mod top_score_collector;
+pub use self::top_score_collector::TopDocs;
+
+mod top_field_collector;
+pub use self::top_field_collector::TopDocsByField;

 mod facet_collector;
 pub use self::facet_collector::FacetCollector;

-mod chained_collector;
-pub use self::chained_collector::{chain, ChainedCollector};
+/// `Fruit` is the type for the result of our collection.
+/// e.g. `usize` for the `Count` collector.
+pub trait Fruit: Send + downcast::Any {}
+
+impl<T> Fruit for T where T: Send + downcast::Any {}

 /// Collectors are in charge of collecting and retaining relevant
 /// information from the document found and scored by the query.
 ///
-///
 /// For instance,
 ///
 /// - keeping track of the top 10 best documents
 /// - computing a breakdown over a fast field
 /// - computing the number of documents matching the query
 ///
-/// Queries are in charge of pushing the `DocSet` to the collector.
+/// Our search index is in fact a collection of segments, so
+/// a `Collector` trait is actually more of a factory to instance
+/// `SegmentCollector`s for each segments.
 ///
-/// As they work on multiple segments, they first inform
-/// the collector of a change in a segment and then
-/// call the `collect` method to push the document to the collector.
-///
-/// Temporally, our collector will receive calls
-/// - `.set_segment(0, segment_reader_0)`
-/// - `.collect(doc0_of_segment_0)`
-/// - `.collect(...)`
-/// - `.collect(last_doc_of_segment_0)`
-/// - `.set_segment(1, segment_reader_1)`
-/// - `.collect(doc0_of_segment_1)`
-/// - `.collect(...)`
-/// - `.collect(last_doc_of_segment_1)`
-/// - `...`
-/// - `.collect(last_doc_of_last_segment)`
+/// The collection logic itself is in the `SegmentCollector`.
 ///
 /// Segments are not guaranteed to be visited in any specific order.
-pub trait Collector {
+pub trait Collector: Sync {
+    /// `Fruit` is the type for the result of our collection.
+    /// e.g. `usize` for the `Count` collector.
+    type Fruit: Fruit;
+
+    /// Type of the `SegmentCollector` associated to this collector.
+    type Child: SegmentCollector<Fruit = Self::Fruit>;
+
    /// `set_segment` is called before beginning to enumerate
    /// on this segment.
-    fn set_segment(
-        &mut self,
+    fn for_segment(
+        &self,
        segment_local_id: SegmentLocalId,
        segment: &SegmentReader,
-    ) -> Result<()>;
-    /// The query pushes the scored document to the collector via this method.
-    fn collect(&mut self, doc: DocId, score: Score);
+    ) -> Result<Self::Child>;

    /// Returns true iff the collector requires to compute scores for documents.
    fn requires_scoring(&self) -> bool;
+
+    /// Combines the fruit associated to the collection of each segments
+    /// into one fruit.
+    fn merge_fruits(&self, segment_fruits: Vec<Self::Fruit>) -> Result<Self::Fruit>;
 }

-impl<'a, C: Collector> Collector for &'a mut C {
-    fn set_segment(
-        &mut self,
-        segment_local_id: SegmentLocalId,
-        segment: &SegmentReader,
-    ) -> Result<()> {
-        (*self).set_segment(segment_local_id, segment)
-    }
+/// The `SegmentCollector` is the trait in charge of defining the
+/// collect operation at the scale of the segment.
+///
+/// `.collect(doc, score)` will be called for every documents
+/// matching the query.
+pub trait SegmentCollector: 'static {
+    /// `Fruit` is the type for the result of our collection.
+    /// e.g. `usize` for the `Count` collector.
+    type Fruit: Fruit;
+
    /// The query pushes the scored document to the collector via this method.
-    fn collect(&mut self, doc: DocId, score: Score) {
-        C::collect(self, doc, score)
+    fn collect(&mut self, doc: DocId, score: Score);
+
+    /// Extract the fruit of the collection from the `SegmentCollector`.
+    fn harvest(self) -> Self::Fruit;
+}
+
+// -----------------------------------------------
+// Tuple implementations.
+
+impl<Left, Right> Collector for (Left, Right)
+where
+    Left: Collector,
+    Right: Collector,
+{
+    type Fruit = (Left::Fruit, Right::Fruit);
+    type Child = (Left::Child, Right::Child);
+
+    fn for_segment(&self, segment_local_id: u32, segment: &SegmentReader) -> Result<Self::Child> {
+        let left = self.0.for_segment(segment_local_id, segment)?;
+        let right = self.1.for_segment(segment_local_id, segment)?;
+        Ok((left, right))
    }

    fn requires_scoring(&self) -> bool {
-        C::requires_scoring(self)
+        self.0.requires_scoring() || self.1.requires_scoring()
    }
+
+    fn merge_fruits(
+        &self,
+        children: Vec<(Left::Fruit, Right::Fruit)>,
+    ) -> Result<(Left::Fruit, Right::Fruit)> {
+        let mut left_fruits = vec![];
+        let mut right_fruits = vec![];
+        for (left_fruit, right_fruit) in children {
+            left_fruits.push(left_fruit);
+            right_fruits.push(right_fruit);
+        }
+        Ok((
+            self.0.merge_fruits(left_fruits)?,
+            self.1.merge_fruits(right_fruits)?,
+        ))
+    }
+}
+
+impl<Left, Right> SegmentCollector for (Left, Right)
+where
+    Left: SegmentCollector,
+    Right: SegmentCollector,
+{
+    type Fruit = (Left::Fruit, Right::Fruit);
+
+    fn collect(&mut self, doc: DocId, score: Score) {
+        self.0.collect(doc, score);
+        self.1.collect(doc, score);
+    }
+
+    fn harvest(self) -> <Self as SegmentCollector>::Fruit {
+        (self.0.harvest(), self.1.harvest())
+    }
+}
+
+// 3-Tuple
+
+impl<One, Two, Three> Collector for (One, Two, Three)
+where
+    One: Collector,
+    Two: Collector,
+    Three: Collector,
+{
+    type Fruit = (One::Fruit, Two::Fruit, Three::Fruit);
+    type Child = (One::Child, Two::Child, Three::Child);
+
+    fn for_segment(&self, segment_local_id: u32, segment: &SegmentReader) -> Result<Self::Child> {
+        let one = self.0.for_segment(segment_local_id, segment)?;
+        let two = self.1.for_segment(segment_local_id, segment)?;
+        let three = self.2.for_segment(segment_local_id, segment)?;
+        Ok((one, two, three))
+    }
+
+    fn requires_scoring(&self) -> bool {
+        self.0.requires_scoring() || self.1.requires_scoring() || self.2.requires_scoring()
+    }
+
+    fn merge_fruits(&self, children: Vec<Self::Fruit>) -> Result<Self::Fruit> {
+        let mut one_fruits = vec![];
+        let mut two_fruits = vec![];
+        let mut three_fruits = vec![];
+        for (one_fruit, two_fruit, three_fruit) in children {
+            one_fruits.push(one_fruit);
+            two_fruits.push(two_fruit);
+            three_fruits.push(three_fruit);
+        }
+        Ok((
+            self.0.merge_fruits(one_fruits)?,
+            self.1.merge_fruits(two_fruits)?,
+            self.2.merge_fruits(three_fruits)?,
+        ))
+    }
+}
+
+impl<One, Two, Three> SegmentCollector for (One, Two, Three)
+where
+    One: SegmentCollector,
+    Two: SegmentCollector,
+    Three: SegmentCollector,
+{
+    type Fruit = (One::Fruit, Two::Fruit, Three::Fruit);
+
+    fn collect(&mut self, doc: DocId, score: Score) {
+        self.0.collect(doc, score);
+        self.1.collect(doc, score);
+        self.2.collect(doc, score);
+    }
+
+    fn harvest(self) -> <Self as SegmentCollector>::Fruit {
+        (self.0.harvest(), self.1.harvest(), self.2.harvest())
+    }
+}
+
+// 4-Tuple
+
+impl<One, Two, Three, Four> Collector for (One, Two, Three, Four)
+where
+    One: Collector,
+    Two: Collector,
+    Three: Collector,
+    Four: Collector,
+{
+    type Fruit = (One::Fruit, Two::Fruit, Three::Fruit, Four::Fruit);
+    type Child = (One::Child, Two::Child, Three::Child, Four::Child);
+
+    fn for_segment(&self, segment_local_id: u32, segment: &SegmentReader) -> Result<Self::Child> {
+        let one = self.0.for_segment(segment_local_id, segment)?;
+        let two = self.1.for_segment(segment_local_id, segment)?;
+        let three = self.2.for_segment(segment_local_id, segment)?;
+        let four = self.3.for_segment(segment_local_id, segment)?;
+        Ok((one, two, three, four))
+    }
+
+    fn requires_scoring(&self) -> bool {
+        self.0.requires_scoring()
+            || self.1.requires_scoring()
+            || self.2.requires_scoring()
+            || self.3.requires_scoring()
+    }
+
+    fn merge_fruits(&self, children: Vec<Self::Fruit>) -> Result<Self::Fruit> {
+        let mut one_fruits = vec![];
+        let mut two_fruits = vec![];
+        let mut three_fruits = vec![];
+        let mut four_fruits = vec![];
+        for (one_fruit, two_fruit, three_fruit, four_fruit) in children {
+            one_fruits.push(one_fruit);
+            two_fruits.push(two_fruit);
+            three_fruits.push(three_fruit);
+            four_fruits.push(four_fruit);
+        }
+        Ok((
+            self.0.merge_fruits(one_fruits)?,
+            self.1.merge_fruits(two_fruits)?,
+            self.2.merge_fruits(three_fruits)?,
+            self.3.merge_fruits(four_fruits)?,
+        ))
+    }
+}
+
+impl<One, Two, Three, Four> SegmentCollector for (One, Two, Three, Four)
+where
+    One: SegmentCollector,
+    Two: SegmentCollector,
+    Three: SegmentCollector,
+    Four: SegmentCollector,
+{
+    type Fruit = (One::Fruit, Two::Fruit, Three::Fruit, Four::Fruit);
+
+    fn collect(&mut self, doc: DocId, score: Score) {
+        self.0.collect(doc, score);
+        self.1.collect(doc, score);
+        self.2.collect(doc, score);
+        self.3.collect(doc, score);
+    }
+
+    fn harvest(self) -> <Self as SegmentCollector>::Fruit {
+        (
+            self.0.harvest(),
+            self.1.harvest(),
+            self.2.harvest(),
+            self.3.harvest(),
+        )
+    }
+}
+
+#[allow(missing_docs)]
+mod downcast_impl {
+    downcast!(super::Fruit);
 }

 #[cfg(test)]
-pub mod tests {
-
-    use super::*;
-    use core::SegmentReader;
-    use fastfield::BytesFastFieldReader;
-    use fastfield::FastFieldReader;
-    use schema::Field;
-    use DocId;
-    use Score;
-    use SegmentLocalId;
-
-    /// Stores all of the doc ids.
-    /// This collector is only used for tests.
-    /// It is unusable in practise, as it does not store
-    /// the segment ordinals
-    pub struct TestCollector {
-        offset: DocId,
-        segment_max_doc: DocId,
-        docs: Vec<DocId>,
-        scores: Vec<Score>,
-    }
-
-    impl TestCollector {
-        /// Return the exhalist of documents.
-        pub fn docs(self) -> Vec<DocId> {
-            self.docs
-        }
-
-        pub fn scores(self) -> Vec<Score> {
-            self.scores
-        }
-    }
-
-    impl Default for TestCollector {
-        fn default() -> TestCollector {
-            TestCollector {
-                offset: 0,
-                segment_max_doc: 0,
-                docs: Vec::new(),
-                scores: Vec::new(),
-            }
-        }
-    }
-
-    impl Collector for TestCollector {
-        fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
-            self.offset += self.segment_max_doc;
-            self.segment_max_doc = reader.max_doc();
-            Ok(())
-        }
-
-        fn collect(&mut self, doc: DocId, score: Score) {
-            self.docs.push(doc + self.offset);
-            self.scores.push(score);
-        }
-
-        fn requires_scoring(&self) -> bool {
-            true
-        }
-    }
-
-    /// Collects in order all of the fast fields for all of the
-    /// doc in the `DocSet`
-    ///
-    /// This collector is mainly useful for tests.
-    pub struct FastFieldTestCollector {
-        vals: Vec<u64>,
-        field: Field,
-        ff_reader: Option<FastFieldReader<u64>>,
-    }
-
-    impl FastFieldTestCollector {
-        pub fn for_field(field: Field) -> FastFieldTestCollector {
-            FastFieldTestCollector {
-                vals: Vec::new(),
-                field,
-                ff_reader: None,
-            }
-        }
-
-        pub fn vals(self) -> Vec<u64> {
-            self.vals
-        }
-    }
-
-    impl Collector for FastFieldTestCollector {
-        fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
-            self.ff_reader = Some(reader.fast_field_reader(self.field)?);
-            Ok(())
-        }
-
-        fn collect(&mut self, doc: DocId, _score: Score) {
-            let val = self.ff_reader.as_ref().unwrap().get(doc);
-            self.vals.push(val);
-        }
-        fn requires_scoring(&self) -> bool {
-            false
-        }
-    }
-
-    /// Collects in order all of the fast field bytes for all of the
-    /// docs in the `DocSet`
-    ///
-    /// This collector is mainly useful for tests.
-    pub struct BytesFastFieldTestCollector {
-        vals: Vec<u8>,
-        field: Field,
-        ff_reader: Option<BytesFastFieldReader>,
-    }
-
-    impl BytesFastFieldTestCollector {
-        pub fn for_field(field: Field) -> BytesFastFieldTestCollector {
-            BytesFastFieldTestCollector {
-                vals: Vec::new(),
-                field,
-                ff_reader: None,
-            }
-        }
-
-        pub fn vals(self) -> Vec<u8> {
-            self.vals
-        }
-    }
-
-    impl Collector for BytesFastFieldTestCollector {
-        fn set_segment(&mut self, _segment_local_id: u32, segment: &SegmentReader) -> Result<()> {
-            self.ff_reader = Some(segment.bytes_fast_field_reader(self.field)?);
-            Ok(())
-        }
-
-        fn collect(&mut self, doc: u32, _score: f32) {
-            let val = self.ff_reader.as_ref().unwrap().get_val(doc);
-            self.vals.extend(val);
-        }
-
-        fn requires_scoring(&self) -> bool {
-            false
-        }
-    }
-}
-
-#[cfg(all(test, feature = "unstable"))]
-mod bench {
-    use collector::{Collector, CountCollector};
-    use test::Bencher;
-
-    #[bench]
-    fn build_collector(b: &mut Bencher) {
-        b.iter(|| {
-            let mut count_collector = CountCollector::default();
-            let docs: Vec<u32> = (0..1_000_000).collect();
-            for doc in docs {
-                count_collector.collect(doc, 1f32);
-            }
-            count_collector.count()
-        });
-    }
-}
+pub mod tests;
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -1,9 +1,97 @@
 use super::Collector;
+use super::SegmentCollector;
+use collector::Fruit;
+use downcast::Downcast;
+use std::marker::PhantomData;
 use DocId;
 use Result;
 use Score;
 use SegmentLocalId;
 use SegmentReader;
+use TantivyError;
+
+pub struct MultiFruit {
+    sub_fruits: Vec<Option<Box<Fruit>>>,
+}
+
+pub struct CollectorWrapper<TCollector: Collector>(TCollector);
+
+impl<TCollector: Collector> Collector for CollectorWrapper<TCollector> {
+    type Fruit = Box<Fruit>;
+    type Child = Box<BoxableSegmentCollector>;
+
+    fn for_segment(
+        &self,
+        segment_local_id: u32,
+        reader: &SegmentReader,
+    ) -> Result<Box<BoxableSegmentCollector>> {
+        let child = self.0.for_segment(segment_local_id, reader)?;
+        Ok(Box::new(SegmentCollectorWrapper(child)))
+    }
+
+    fn requires_scoring(&self) -> bool {
+        self.0.requires_scoring()
+    }
+
+    fn merge_fruits(&self, children: Vec<<Self as Collector>::Fruit>) -> Result<Box<Fruit>> {
+        let typed_fruit: Vec<TCollector::Fruit> = children
+            .into_iter()
+            .map(|untyped_fruit| {
+                Downcast::<TCollector::Fruit>::downcast(untyped_fruit)
+                    .map(|boxed_but_typed| *boxed_but_typed)
+                    .map_err(|e| {
+                        let err_msg = format!("Failed to cast child collector fruit. {:?}", e);
+                        TantivyError::InvalidArgument(err_msg)
+                    })
+            })
+            .collect::<Result<_>>()?;
+        let merged_fruit = self.0.merge_fruits(typed_fruit)?;
+        Ok(Box::new(merged_fruit))
+    }
+}
+
+impl SegmentCollector for Box<BoxableSegmentCollector> {
+    type Fruit = Box<Fruit>;
+
+    fn collect(&mut self, doc: u32, score: f32) {
+        self.as_mut().collect(doc, score);
+    }
+
+    fn harvest(self) -> Box<Fruit> {
+        BoxableSegmentCollector::harvest_from_box(self)
+    }
+}
+
+pub trait BoxableSegmentCollector {
+    fn collect(&mut self, doc: u32, score: f32);
+    fn harvest_from_box(self: Box<Self>) -> Box<Fruit>;
+}
+
+pub struct SegmentCollectorWrapper<TSegmentCollector: SegmentCollector>(TSegmentCollector);
+
+impl<TSegmentCollector: SegmentCollector> BoxableSegmentCollector
+    for SegmentCollectorWrapper<TSegmentCollector>
+{
+    fn collect(&mut self, doc: u32, score: f32) {
+        self.0.collect(doc, score);
+    }
+
+    fn harvest_from_box(self: Box<Self>) -> Box<Fruit> {
+        Box::new(self.0.harvest())
+    }
+}
+
+pub struct FruitHandle<TFruit: Fruit> {
+    pos: usize,
+    _phantom: PhantomData<TFruit>,
+}
+
+impl<TFruit: Fruit> FruitHandle<TFruit> {
+    pub fn extract(self, fruits: &mut MultiFruit) -> TFruit {
+        let boxed_fruit = fruits.sub_fruits[self.pos].take().expect("");
+        *Downcast::<TFruit>::downcast(boxed_fruit).expect("Failed")
+    }
+}

 /// Multicollector makes it possible to collect on more than one collector.
 /// It should only be used for use cases where the Collector types is unknown
@@ -13,14 +101,14 @@ use SegmentReader;
 /// ```rust
 /// #[macro_use]
 /// extern crate tantivy;
-/// use tantivy::schema::{SchemaBuilder, TEXT};
+/// use tantivy::schema::{Schema, TEXT};
 /// use tantivy::{Index, Result};
-/// use tantivy::collector::{CountCollector, TopCollector, MultiCollector};
+/// use tantivy::collector::{Count, TopDocs, MultiCollector};
 /// use tantivy::query::QueryParser;
 ///
 /// # fn main() { example().unwrap(); }
 /// fn example() -> Result<()> {
-///     let mut schema_builder = SchemaBuilder::new();
+///     let mut schema_builder = Schema::builder();
 ///     let title = schema_builder.add_text_field("title", TEXT);
 ///     let schema = schema_builder.build();
 ///     let index = Index::create_in_ram(schema);
@@ -44,55 +132,115 @@ use SegmentReader;
 ///     index.load_searchers()?;
 ///     let searcher = index.searcher();
 ///
-///     {
-///         let mut top_collector = TopCollector::with_limit(2);
-///         let mut count_collector = CountCollector::default();
-///         {
-///             let mut collectors =
-///                 MultiCollector::from(vec![&mut top_collector, &mut count_collector]);
-///             let query_parser = QueryParser::for_index(&index, vec![title]);
-///             let query = query_parser.parse_query("diary")?;
-///             searcher.search(&*query, &mut collectors).unwrap();
-///         }
-///         assert_eq!(count_collector.count(), 2);
-///         assert!(top_collector.at_capacity());
-///     }
+///     let mut collectors = MultiCollector::new();
+///     let top_docs_handle = collectors.add_collector(TopDocs::with_limit(2));
+///     let count_handle = collectors.add_collector(Count);
+///     let query_parser = QueryParser::for_index(&index, vec![title]);
+///     let query = query_parser.parse_query("diary")?;
+///     let mut multi_fruit = searcher.search(&query, &collectors)?;
+///
+///     let count = count_handle.extract(&mut multi_fruit);
+///     let top_docs = top_docs_handle.extract(&mut multi_fruit);
+///
+///     # assert_eq!(count, 2);
+///     # assert_eq!(top_docs.len(), 2);
 ///
 ///     Ok(())
 /// }
 /// ```
+#[allow(clippy::type_complexity)]
+#[derive(Default)]
 pub struct MultiCollector<'a> {
-    collectors: Vec<&'a mut Collector>,
+    collector_wrappers:
+        Vec<Box<Collector<Child = Box<BoxableSegmentCollector>, Fruit = Box<Fruit>> + 'a>>,
 }

 impl<'a> MultiCollector<'a> {
-    /// Constructor
-    pub fn from(collectors: Vec<&'a mut Collector>) -> MultiCollector {
-        MultiCollector { collectors }
+    /// Create a new `MultiCollector`
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    /// Add a new collector to our `MultiCollector`.
+    pub fn add_collector<'b: 'a, TCollector: Collector + 'b>(
+        &mut self,
+        collector: TCollector,
+    ) -> FruitHandle<TCollector::Fruit> {
+        let pos = self.collector_wrappers.len();
+        self.collector_wrappers
+            .push(Box::new(CollectorWrapper(collector)));
+        FruitHandle {
+            pos,
+            _phantom: PhantomData,
+        }
    }
 }

 impl<'a> Collector for MultiCollector<'a> {
-    fn set_segment(
-        &mut self,
+    type Fruit = MultiFruit;
+    type Child = MultiCollectorChild;
+
+    fn for_segment(
+        &self,
        segment_local_id: SegmentLocalId,
        segment: &SegmentReader,
-    ) -> Result<()> {
-        for collector in &mut self.collectors {
-            collector.set_segment(segment_local_id, segment)?;
-        }
-        Ok(())
+    ) -> Result<MultiCollectorChild> {
+        let children = self
+            .collector_wrappers
+            .iter()
+            .map(|collector_wrapper| collector_wrapper.for_segment(segment_local_id, segment))
+            .collect::<Result<Vec<_>>>()?;
+        Ok(MultiCollectorChild { children })
    }

+    fn requires_scoring(&self) -> bool {
+        self.collector_wrappers.iter().any(|c| c.requires_scoring())
+    }
+
+    fn merge_fruits(&self, segments_multifruits: Vec<MultiFruit>) -> Result<MultiFruit> {
+        let mut segment_fruits_list: Vec<Vec<Box<Fruit>>> = (0..self.collector_wrappers.len())
+            .map(|_| Vec::with_capacity(segments_multifruits.len()))
+            .collect::<Vec<_>>();
+        for segment_multifruit in segments_multifruits {
+            for (idx, segment_fruit_opt) in segment_multifruit.sub_fruits.into_iter().enumerate() {
+                if let Some(segment_fruit) = segment_fruit_opt {
+                    segment_fruits_list[idx].push(segment_fruit);
+                }
+            }
+        }
+        let sub_fruits = self
+            .collector_wrappers
+            .iter()
+            .zip(segment_fruits_list)
+            .map(|(child_collector, segment_fruits)| {
+                Ok(Some(child_collector.merge_fruits(segment_fruits)?))
+            })
+            .collect::<Result<_>>()?;
+        Ok(MultiFruit { sub_fruits })
+    }
+}
+
+pub struct MultiCollectorChild {
+    children: Vec<Box<BoxableSegmentCollector>>,
+}
+
+impl SegmentCollector for MultiCollectorChild {
+    type Fruit = MultiFruit;
+
    fn collect(&mut self, doc: DocId, score: Score) {
-        for collector in &mut self.collectors {
-            collector.collect(doc, score);
+        for child in &mut self.children {
+            child.collect(doc, score);
        }
    }
-    fn requires_scoring(&self) -> bool {
-        self.collectors
-            .iter()
-            .any(|collector| collector.requires_scoring())
+
+    fn harvest(self) -> MultiFruit {
+        MultiFruit {
+            sub_fruits: self
+                .children
+                .into_iter()
+                .map(|child| Some(child.harvest()))
+                .collect(),
+        }
    }
 }

@@ -100,20 +248,42 @@ impl<'a> Collector for MultiCollector<'a> {
 mod tests {

    use super::*;
-    use collector::{Collector, CountCollector, TopCollector};
+    use collector::{Count, TopDocs};
+    use query::TermQuery;
+    use schema::IndexRecordOption;
+    use schema::{Schema, TEXT};
+    use Index;
+    use Term;

    #[test]
    fn test_multi_collector() {
-        let mut top_collector = TopCollector::with_limit(2);
-        let mut count_collector = CountCollector::default();
+        let mut schema_builder = Schema::builder();
+        let text = schema_builder.add_text_field("text", TEXT);
+        let schema = schema_builder.build();
+
+        let index = Index::create_in_ram(schema);
        {
-            let mut collectors =
-                MultiCollector::from(vec![&mut top_collector, &mut count_collector]);
-            collectors.collect(1, 0.2);
-            collectors.collect(2, 0.1);
-            collectors.collect(3, 0.5);
+            let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+            index_writer.add_document(doc!(text=>"abc"));
+            index_writer.add_document(doc!(text=>"abc abc abc"));
+            index_writer.add_document(doc!(text=>"abc abc"));
+            index_writer.commit().unwrap();
+            index_writer.add_document(doc!(text=>""));
+            index_writer.add_document(doc!(text=>"abc abc abc abc"));
+            index_writer.add_document(doc!(text=>"abc"));
+            index_writer.commit().unwrap();
        }
-        assert_eq!(count_collector.count(), 3);
-        assert!(top_collector.at_capacity());
+        index.load_searchers().unwrap();
+        let searcher = index.searcher();
+        let term = Term::from_field_text(text, "abc");
+        let query = TermQuery::new(term, IndexRecordOption::Basic);
+
+        let mut collectors = MultiCollector::new();
+        let topdocs_handler = collectors.add_collector(TopDocs::with_limit(2));
+        let count_handler = collectors.add_collector(Count);
+        let mut multifruits = searcher.search(&query, &mut collectors).unwrap();
+
+        assert_eq!(count_handler.extract(&mut multifruits), 5);
+        assert_eq!(topdocs_handler.extract(&mut multifruits).len(), 2);
    }
 }
--- a/src/collector/tests.rs
+++ b/src/collector/tests.rs
@@ -0,0 +1,201 @@
+use super::*;
+use core::SegmentReader;
+use fastfield::BytesFastFieldReader;
+use fastfield::FastFieldReader;
+use schema::Field;
+use DocAddress;
+use DocId;
+use Score;
+use SegmentLocalId;
+
+/// Stores all of the doc ids.
+/// This collector is only used for tests.
+/// It is unusable in pr
+///
+/// actise, as it does not store
+/// the segment ordinals
+pub struct TestCollector;
+
+pub struct TestSegmentCollector {
+    segment_id: SegmentLocalId,
+    fruit: TestFruit,
+}
+
+#[derive(Default)]
+pub struct TestFruit {
+    docs: Vec<DocAddress>,
+    scores: Vec<Score>,
+}
+
+impl TestFruit {
+    /// Return the list of matching documents exhaustively.
+    pub fn docs(&self) -> &[DocAddress] {
+        &self.docs[..]
+    }
+
+    pub fn scores(&self) -> &[Score] {
+        &self.scores[..]
+    }
+}
+
+impl Collector for TestCollector {
+    type Fruit = TestFruit;
+    type Child = TestSegmentCollector;
+
+    fn for_segment(
+        &self,
+        segment_id: SegmentLocalId,
+        _reader: &SegmentReader,
+    ) -> Result<TestSegmentCollector> {
+        Ok(TestSegmentCollector {
+            segment_id,
+            fruit: TestFruit::default(),
+        })
+    }
+
+    fn requires_scoring(&self) -> bool {
+        true
+    }
+
+    fn merge_fruits(&self, mut children: Vec<TestFruit>) -> Result<TestFruit> {
+        children.sort_by_key(|fruit| {
+            if fruit.docs().is_empty() {
+                0
+            } else {
+                fruit.docs()[0].segment_ord()
+            }
+        });
+        let mut docs = vec![];
+        let mut scores = vec![];
+        for child in children {
+            docs.extend(child.docs());
+            scores.extend(child.scores);
+        }
+        Ok(TestFruit { docs, scores })
+    }
+}
+
+impl SegmentCollector for TestSegmentCollector {
+    type Fruit = TestFruit;
+
+    fn collect(&mut self, doc: DocId, score: Score) {
+        self.fruit.docs.push(DocAddress(self.segment_id, doc));
+        self.fruit.scores.push(score);
+    }
+
+    fn harvest(self) -> <Self as SegmentCollector>::Fruit {
+        self.fruit
+    }
+}
+
+/// Collects in order all of the fast fields for all of the
+/// doc in the `DocSet`
+///
+/// This collector is mainly useful for tests.
+pub struct FastFieldTestCollector {
+    field: Field,
+}
+
+pub struct FastFieldSegmentCollector {
+    vals: Vec<u64>,
+    reader: FastFieldReader<u64>,
+}
+
+impl FastFieldTestCollector {
+    pub fn for_field(field: Field) -> FastFieldTestCollector {
+        FastFieldTestCollector { field }
+    }
+}
+
+impl Collector for FastFieldTestCollector {
+    type Fruit = Vec<u64>;
+    type Child = FastFieldSegmentCollector;
+
+    fn for_segment(
+        &self,
+        _: SegmentLocalId,
+        reader: &SegmentReader,
+    ) -> Result<FastFieldSegmentCollector> {
+        Ok(FastFieldSegmentCollector {
+            vals: Vec::new(),
+            reader: reader.fast_field_reader(self.field)?,
+        })
+    }
+
+    fn requires_scoring(&self) -> bool {
+        false
+    }
+
+    fn merge_fruits(&self, children: Vec<Vec<u64>>) -> Result<Vec<u64>> {
+        Ok(children.into_iter().flat_map(|v| v.into_iter()).collect())
+    }
+}
+
+impl SegmentCollector for FastFieldSegmentCollector {
+    type Fruit = Vec<u64>;
+
+    fn collect(&mut self, doc: DocId, _score: Score) {
+        let val = self.reader.get(doc);
+        self.vals.push(val);
+    }
+
+    fn harvest(self) -> Vec<u64> {
+        self.vals
+    }
+}
+
+/// Collects in order all of the fast field bytes for all of the
+/// docs in the `DocSet`
+///
+/// This collector is mainly useful for tests.
+pub struct BytesFastFieldTestCollector {
+    field: Field,
+}
+
+pub struct BytesFastFieldSegmentCollector {
+    vals: Vec<u8>,
+    reader: BytesFastFieldReader,
+}
+
+impl BytesFastFieldTestCollector {
+    pub fn for_field(field: Field) -> BytesFastFieldTestCollector {
+        BytesFastFieldTestCollector { field }
+    }
+}
+
+impl Collector for BytesFastFieldTestCollector {
+    type Fruit = Vec<u8>;
+    type Child = BytesFastFieldSegmentCollector;
+
+    fn for_segment(
+        &self,
+        _segment_local_id: u32,
+        segment: &SegmentReader,
+    ) -> Result<BytesFastFieldSegmentCollector> {
+        Ok(BytesFastFieldSegmentCollector {
+            vals: Vec::new(),
+            reader: segment.bytes_fast_field_reader(self.field)?,
+        })
+    }
+
+    fn requires_scoring(&self) -> bool {
+        false
+    }
+
+    fn merge_fruits(&self, children: Vec<Vec<u8>>) -> Result<Vec<u8>> {
+        Ok(children.into_iter().flat_map(|c| c.into_iter()).collect())
+    }
+}
+
+impl SegmentCollector for BytesFastFieldSegmentCollector {
+    type Fruit = Vec<u8>;
+
+    fn collect(&mut self, doc: u32, _score: f32) {
+        let data = self.reader.get_val(doc);
+        self.vals.extend(data);
+    }
+
+    fn harvest(self) -> <Self as SegmentCollector>::Fruit {
+        self.vals
+    }
+}
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -1,244 +1,224 @@
-use super::Collector;
+use serde::export::PhantomData;
 use std::cmp::Ordering;
 use std::collections::BinaryHeap;
 use DocAddress;
 use DocId;
 use Result;
-use Score;
 use SegmentLocalId;
 use SegmentReader;

-// Rust heap is a max-heap and we need a min heap.
-#[derive(Clone, Copy)]
-struct GlobalScoredDoc {
-    score: Score,
-    doc_address: DocAddress,
+/// Contains a feature (field, score, etc.) of a document along with the document address.
+///
+/// It has a custom implementation of `PartialOrd` that reverses the order. This is because the
+/// default Rust heap is a max heap, whereas a min heap is needed.
+///
+/// WARNING: equality is not what you would expect here.
+/// Two elements are equal if their feature is equal, and regardless of whether `doc`
+/// is equal. This should be perfectly fine for this usage, but let's make sure this
+/// struct is never public.
+struct ComparableDoc<T, D> {
+    feature: T,
+    doc: D,
 }

-impl PartialOrd for GlobalScoredDoc {
-    fn partial_cmp(&self, other: &GlobalScoredDoc) -> Option<Ordering> {
+impl<T: PartialOrd, D> PartialOrd for ComparableDoc<T, D> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
 }

-impl Ord for GlobalScoredDoc {
+impl<T: PartialOrd, D> Ord for ComparableDoc<T, D> {
    #[inline]
-    fn cmp(&self, other: &GlobalScoredDoc) -> Ordering {
+    fn cmp(&self, other: &Self) -> Ordering {
        other
-            .score
-            .partial_cmp(&self.score)
-            .unwrap_or_else(|| other.doc_address.cmp(&self.doc_address))
+            .feature
+            .partial_cmp(&self.feature)
+            .unwrap_or_else(|| Ordering::Equal)
    }
 }

-impl PartialEq for GlobalScoredDoc {
-    fn eq(&self, other: &GlobalScoredDoc) -> bool {
+impl<T: PartialOrd, D> PartialEq for ComparableDoc<T, D> {
+    fn eq(&self, other: &Self) -> bool {
        self.cmp(other) == Ordering::Equal
    }
 }

-impl Eq for GlobalScoredDoc {}
+impl<T: PartialOrd, D> Eq for ComparableDoc<T, D> {}

-/// The Top Collector keeps track of the K documents
-/// with the best scores.
-///
-/// The implementation is based on a `BinaryHeap`.
-/// The theorical complexity for collecting the top `K` out of `n` documents
-/// is `O(n log K)`.
-///
-/// ```rust
-/// #[macro_use]
-/// extern crate tantivy;
-/// use tantivy::schema::{SchemaBuilder, TEXT};
-/// use tantivy::{Index, Result, DocId, Score};
-/// use tantivy::collector::TopCollector;
-/// use tantivy::query::QueryParser;
-///
-/// # fn main() { example().unwrap(); }
-/// fn example() -> Result<()> {
-///     let mut schema_builder = SchemaBuilder::new();
-///     let title = schema_builder.add_text_field("title", TEXT);
-///     let schema = schema_builder.build();
-///     let index = Index::create_in_ram(schema);
-///     {
-///         let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
-///         index_writer.add_document(doc!(
-///             title => "The Name of the Wind",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "The Diary of Muadib",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "A Dairy Cow",
-///         ));
-///         index_writer.add_document(doc!(
-///             title => "The Diary of a Young Girl",
-///         ));
-///         index_writer.commit().unwrap();
-///     }
-///
-///     index.load_searchers()?;
-///     let searcher = index.searcher();
-///
-///     {
-///	        let mut top_collector = TopCollector::with_limit(2);
-///         let query_parser = QueryParser::for_index(&index, vec![title]);
-///         let query = query_parser.parse_query("diary")?;
-///         searcher.search(&*query, &mut top_collector).unwrap();
-///
-///         let score_docs: Vec<(Score, DocId)> = top_collector
-///           .score_docs()
-///           .into_iter()
-///           .map(|(score, doc_address)| (score, doc_address.doc()))
-///           .collect();
-///
-///         assert_eq!(score_docs, vec![(0.7261542, 1), (0.6099695, 3)]);
-///     }
-///
-///     Ok(())
-/// }
-/// ```
-pub struct TopCollector {
+pub(crate) struct TopCollector<T> {
    limit: usize,
-    heap: BinaryHeap<GlobalScoredDoc>,
-    segment_id: u32,
+    _marker: PhantomData<T>,
 }

-impl TopCollector {
+impl<T> TopCollector<T>
+where
+    T: PartialOrd + Clone,
+{
    /// Creates a top collector, with a number of documents equal to "limit".
    ///
    /// # Panics
    /// The method panics if limit is 0
-    pub fn with_limit(limit: usize) -> TopCollector {
+    pub fn with_limit(limit: usize) -> TopCollector<T> {
        if limit < 1 {
            panic!("Limit must be strictly greater than 0.");
        }
        TopCollector {
            limit,
-            heap: BinaryHeap::with_capacity(limit),
-            segment_id: 0,
+            _marker: PhantomData,
        }
    }

-    /// Returns K best documents sorted in decreasing order.
-    ///
-    /// Calling this method triggers the sort.
-    /// The result of the sort is not cached.
-    pub fn docs(&self) -> Vec<DocAddress> {
-        self.score_docs()
-            .into_iter()
-            .map(|score_doc| score_doc.1)
-            .collect()
+    pub fn limit(&self) -> usize {
+        self.limit
    }

-    /// Returns K best ScoredDocument sorted in decreasing order.
-    ///
-    /// Calling this method triggers the sort.
-    /// The result of the sort is not cached.
-    pub fn score_docs(&self) -> Vec<(Score, DocAddress)> {
-        let mut scored_docs: Vec<GlobalScoredDoc> = self.heap.iter().cloned().collect();
-        scored_docs.sort();
-        scored_docs
+    pub fn merge_fruits(
+        &self,
+        children: Vec<Vec<(T, DocAddress)>>,
+    ) -> Result<Vec<(T, DocAddress)>> {
+        if self.limit == 0 {
+            return Ok(Vec::new());
+        }
+        let mut top_collector = BinaryHeap::new();
+        for child_fruit in children {
+            for (feature, doc) in child_fruit {
+                if top_collector.len() < self.limit {
+                    top_collector.push(ComparableDoc { feature, doc });
+                } else if let Some(mut head) = top_collector.peek_mut() {
+                    if head.feature < feature {
+                        *head = ComparableDoc { feature, doc };
+                    }
+                }
+            }
+        }
+        Ok(top_collector
+            .into_sorted_vec()
            .into_iter()
-            .map(|GlobalScoredDoc { score, doc_address }| (score, doc_address))
+            .map(|cdoc| (cdoc.feature, cdoc.doc))
+            .collect())
+    }
+
+    pub(crate) fn for_segment(
+        &self,
+        segment_id: SegmentLocalId,
+        _: &SegmentReader,
+    ) -> Result<TopSegmentCollector<T>> {
+        Ok(TopSegmentCollector::new(segment_id, self.limit))
+    }
+}
+
+/// The Top Collector keeps track of the K documents
+/// sorted by type `T`.
+///
+/// The implementation is based on a `BinaryHeap`.
+/// The theorical complexity for collecting the top `K` out of `n` documents
+/// is `O(n log K)`.
+pub(crate) struct TopSegmentCollector<T> {
+    limit: usize,
+    heap: BinaryHeap<ComparableDoc<T, DocId>>,
+    segment_id: u32,
+}
+
+impl<T: PartialOrd> TopSegmentCollector<T> {
+    fn new(segment_id: SegmentLocalId, limit: usize) -> TopSegmentCollector<T> {
+        TopSegmentCollector {
+            limit,
+            heap: BinaryHeap::with_capacity(limit),
+            segment_id,
+        }
+    }
+}
+
+impl<T: PartialOrd + Clone> TopSegmentCollector<T> {
+    pub fn harvest(self) -> Vec<(T, DocAddress)> {
+        let segment_id = self.segment_id;
+        self.heap
+            .into_sorted_vec()
+            .into_iter()
+            .map(|comparable_doc| {
+                (
+                    comparable_doc.feature,
+                    DocAddress(segment_id, comparable_doc.doc),
+                )
+            })
            .collect()
    }

    /// Return true iff at least K documents have gone through
    /// the collector.
-    #[inline]
-    pub fn at_capacity(&self) -> bool {
+    #[inline(always)]
+    pub(crate) fn at_capacity(&self) -> bool {
        self.heap.len() >= self.limit
    }
-}

-impl Collector for TopCollector {
-    fn set_segment(&mut self, segment_id: SegmentLocalId, _: &SegmentReader) -> Result<()> {
-        self.segment_id = segment_id;
-        Ok(())
-    }
-
-    fn collect(&mut self, doc: DocId, score: Score) {
+    /// Collects a document scored by the given feature
+    ///
+    /// It collects documents until it has reached the max capacity. Once it reaches capacity, it
+    /// will compare the lowest scoring item with the given one and keep whichever is greater.
+    #[inline(always)]
+    pub fn collect(&mut self, doc: DocId, feature: T) {
        if self.at_capacity() {
            // It's ok to unwrap as long as a limit of 0 is forbidden.
-            let limit_doc: GlobalScoredDoc = *self.heap
-                .peek()
-                .expect("Top collector with size 0 is forbidden");
-            if limit_doc.score < score {
-                let mut mut_head = self.heap
-                    .peek_mut()
-                    .expect("Top collector with size 0 is forbidden");
-                mut_head.score = score;
-                mut_head.doc_address = DocAddress(self.segment_id, doc);
+            if let Some(limit_feature) = self.heap.peek().map(|head| head.feature.clone()) {
+                if limit_feature < feature {
+                    if let Some(mut head) = self.heap.peek_mut() {
+                        head.feature = feature;
+                        head.doc = doc;
+                    }
+                }
            }
        } else {
-            let wrapped_doc = GlobalScoredDoc {
-                score,
-                doc_address: DocAddress(self.segment_id, doc),
-            };
-            self.heap.push(wrapped_doc);
+            // we have not reached capacity yet, so we can just push the
+            // element.
+            self.heap.push(ComparableDoc { feature, doc });
        }
    }
-
-    fn requires_scoring(&self) -> bool {
-        true
-    }
 }

 #[cfg(test)]
 mod tests {
-
-    use super::*;
-    use collector::Collector;
-    use DocId;
+    use super::{TopCollector, TopSegmentCollector};
+    use DocAddress;
    use Score;

    #[test]
    fn test_top_collector_not_at_capacity() {
-        let mut top_collector = TopCollector::with_limit(4);
+        let mut top_collector = TopSegmentCollector::new(0, 4);
        top_collector.collect(1, 0.8);
        top_collector.collect(3, 0.2);
        top_collector.collect(5, 0.3);
-        assert!(!top_collector.at_capacity());
-        let score_docs: Vec<(Score, DocId)> = top_collector
-            .score_docs()
-            .into_iter()
-            .map(|(score, doc_address)| (score, doc_address.doc()))
-            .collect();
-        assert_eq!(score_docs, vec![(0.8, 1), (0.3, 5), (0.2, 3)]);
+        assert_eq!(
+            top_collector.harvest(),
+            vec![
+                (0.8, DocAddress(0, 1)),
+                (0.3, DocAddress(0, 5)),
+                (0.2, DocAddress(0, 3))
+            ]
+        );
    }

    #[test]
    fn test_top_collector_at_capacity() {
-        let mut top_collector = TopCollector::with_limit(4);
+        let mut top_collector = TopSegmentCollector::new(0, 4);
        top_collector.collect(1, 0.8);
        top_collector.collect(3, 0.2);
        top_collector.collect(5, 0.3);
        top_collector.collect(7, 0.9);
        top_collector.collect(9, -0.2);
-        assert!(top_collector.at_capacity());
-        {
-            let score_docs: Vec<(Score, DocId)> = top_collector
-                .score_docs()
-                .into_iter()
-                .map(|(score, doc_address)| (score, doc_address.doc()))
-                .collect();
-            assert_eq!(score_docs, vec![(0.9, 7), (0.8, 1), (0.3, 5), (0.2, 3)]);
-        }
-        {
-            let docs: Vec<DocId> = top_collector
-                .docs()
-                .into_iter()
-                .map(|doc_address| doc_address.doc())
-                .collect();
-            assert_eq!(docs, vec![7, 1, 5, 3]);
-        }
+        assert_eq!(
+            top_collector.harvest(),
+            vec![
+                (0.9, DocAddress(0, 7)),
+                (0.8, DocAddress(0, 1)),
+                (0.3, DocAddress(0, 5)),
+                (0.2, DocAddress(0, 3))
+            ]
+        );
    }

    #[test]
    #[should_panic]
    fn test_top_0() {
-        TopCollector::with_limit(0);
+        let _collector: TopCollector<Score> = TopCollector::with_limit(0);
    }
-
 }
--- a/src/collector/top_field_collector.rs
+++ b/src/collector/top_field_collector.rs
@@ -0,0 +1,250 @@
+use super::Collector;
+use collector::top_collector::TopCollector;
+use collector::top_collector::TopSegmentCollector;
+use collector::SegmentCollector;
+use fastfield::FastFieldReader;
+use fastfield::FastValue;
+use schema::Field;
+use DocAddress;
+use Result;
+use SegmentLocalId;
+use SegmentReader;
+
+/// The Top Field Collector keeps track of the K documents
+/// sorted by a fast field in the index
+///
+/// The implementation is based on a `BinaryHeap`.
+/// The theorical complexity for collecting the top `K` out of `n` documents
+/// is `O(n log K)`.
+///
+/// ```rust
+/// #[macro_use]
+/// extern crate tantivy;
+/// # use tantivy::schema::{Schema, Field, FAST, TEXT};
+/// # use tantivy::{Index, Result, DocAddress};
+/// # use tantivy::query::{Query, QueryParser};
+/// use tantivy::collector::TopDocs;
+///
+/// # fn main() {
+/// #   let mut schema_builder = Schema::builder();
+/// #   let title = schema_builder.add_text_field("title", TEXT);
+/// #   let rating = schema_builder.add_u64_field("rating", FAST);
+/// #   let schema = schema_builder.build();
+/// #   let index = Index::create_in_ram(schema);
+/// #   let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+/// #   index_writer.add_document(doc!(
+/// #       title => "The Name of the Wind",
+/// #       rating => 92u64,
+/// #   ));
+/// #   index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64));
+/// #   index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64));
+/// #   index_writer.add_document(doc!(title => "The Diary of a Young Girl", rating => 80u64));
+/// #   index_writer.commit().unwrap();
+/// #   index.load_searchers().unwrap();
+///	#   let query = QueryParser::for_index(&index, vec![title]).parse_query("diary").unwrap();
+/// #   let top_docs = docs_sorted_by_rating(&index, &query, rating).unwrap();
+/// #   assert_eq!(top_docs,
+/// #            vec![(97u64, DocAddress(0u32, 1)),
+/// #                 (80u64, DocAddress(0u32, 3))]);
+/// # }
+/// #
+/// /// Searches the document matching the given query, and
+/// /// collects the top 10 documents, order by the `field`
+/// /// given in argument.
+/// ///
+/// /// `field` is required to be a FAST field.
+/// fn docs_sorted_by_rating(index: &Index, query: &Query, sort_by_field: Field)
+///     -> Result<Vec<(u64, DocAddress)>> {
+///
+///     // This is where we build our collector!
+///     let top_docs_by_rating = TopDocs::with_limit(2).order_by_field(sort_by_field);
+///
+///     // ... and here is our documents. Not this is a simple vec.
+///     // The `u64` in the pair is the value of our fast field for each documents.
+///     index.searcher()
+///          .search(query, &top_docs_by_rating)
+/// }
+/// ```
+pub struct TopDocsByField<T> {
+    collector: TopCollector<T>,
+    field: Field,
+}
+
+impl<T: FastValue + PartialOrd + Clone> TopDocsByField<T> {
+    /// Creates a top field collector, with a number of documents equal to "limit".
+    ///
+    /// The given field name must be a fast field, otherwise the collector have an error while
+    /// collecting results.
+    ///
+    /// # Panics
+    /// The method panics if limit is 0
+    pub(crate) fn new(field: Field, limit: usize) -> TopDocsByField<T> {
+        TopDocsByField {
+            collector: TopCollector::with_limit(limit),
+            field,
+        }
+    }
+}
+
+impl<T: FastValue + PartialOrd + Send + Sync + 'static> Collector for TopDocsByField<T> {
+    type Fruit = Vec<(T, DocAddress)>;
+
+    type Child = TopFieldSegmentCollector<T>;
+
+    fn for_segment(
+        &self,
+        segment_local_id: SegmentLocalId,
+        reader: &SegmentReader,
+    ) -> Result<TopFieldSegmentCollector<T>> {
+        let collector = self.collector.for_segment(segment_local_id, reader)?;
+        let reader = reader.fast_field_reader(self.field)?;
+        Ok(TopFieldSegmentCollector { collector, reader })
+    }
+
+    fn requires_scoring(&self) -> bool {
+        false
+    }
+
+    fn merge_fruits(
+        &self,
+        segment_fruits: Vec<Vec<(T, DocAddress)>>,
+    ) -> Result<Vec<(T, DocAddress)>> {
+        self.collector.merge_fruits(segment_fruits)
+    }
+}
+
+pub struct TopFieldSegmentCollector<T: FastValue + PartialOrd> {
+    collector: TopSegmentCollector<T>,
+    reader: FastFieldReader<T>,
+}
+
+impl<T: FastValue + PartialOrd + Send + Sync + 'static> SegmentCollector
+    for TopFieldSegmentCollector<T>
+{
+    type Fruit = Vec<(T, DocAddress)>;
+
+    fn collect(&mut self, doc: u32, _score: f32) {
+        let field_value = self.reader.get(doc);
+        self.collector.collect(doc, field_value);
+    }
+
+    fn harvest(self) -> Vec<(T, DocAddress)> {
+        self.collector.harvest()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::TopDocsByField;
+    use collector::Collector;
+    use collector::TopDocs;
+    use query::Query;
+    use query::QueryParser;
+    use schema::Field;
+    use schema::IntOptions;
+    use schema::{Schema, FAST, TEXT};
+    use DocAddress;
+    use Index;
+    use IndexWriter;
+    use TantivyError;
+
+    const TITLE: &str = "title";
+    const SIZE: &str = "size";
+
+    #[test]
+    fn test_top_collector_not_at_capacity() {
+        let mut schema_builder = Schema::builder();
+        let title = schema_builder.add_text_field(TITLE, TEXT);
+        let size = schema_builder.add_u64_field(SIZE, FAST);
+        let schema = schema_builder.build();
+        let (index, query) = index("beer", title, schema, |index_writer| {
+            index_writer.add_document(doc!(
+                title => "bottle of beer",
+                size => 12u64,
+            ));
+            index_writer.add_document(doc!(
+                title => "growler of beer",
+                size => 64u64,
+            ));
+            index_writer.add_document(doc!(
+                title => "pint of beer",
+                size => 16u64,
+            ));
+        });
+        let searcher = index.searcher();
+
+        let top_collector = TopDocs::with_limit(4).order_by_field(size);
+        let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector).unwrap();
+        assert_eq!(
+            top_docs,
+            vec![
+                (64, DocAddress(0, 1)),
+                (16, DocAddress(0, 2)),
+                (12, DocAddress(0, 0))
+            ]
+        );
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_field_does_not_exist() {
+        let mut schema_builder = Schema::builder();
+        let title = schema_builder.add_text_field(TITLE, TEXT);
+        let size = schema_builder.add_u64_field(SIZE, FAST);
+        let schema = schema_builder.build();
+        let (index, _) = index("beer", title, schema, |index_writer| {
+            index_writer.add_document(doc!(
+                title => "bottle of beer",
+                size => 12u64,
+            ));
+        });
+        let searcher = index.searcher();
+        let top_collector: TopDocsByField<u64> = TopDocs::with_limit(4).order_by_field(Field(2));
+        let segment_reader = searcher.segment_reader(0u32);
+        top_collector
+            .for_segment(0, segment_reader)
+            .expect("should panic");
+    }
+
+    #[test]
+    fn test_field_not_fast_field() {
+        let mut schema_builder = Schema::builder();
+        let title = schema_builder.add_text_field(TITLE, TEXT);
+        let size = schema_builder.add_u64_field(SIZE, IntOptions::default());
+        let schema = schema_builder.build();
+        let (index, _) = index("beer", title, schema, |index_writer| {
+            index_writer.add_document(doc!(
+                title => "bottle of beer",
+                size => 12u64,
+            ));
+        });
+        let searcher = index.searcher();
+        let segment = searcher.segment_reader(0);
+        let top_collector: TopDocsByField<u64> = TopDocs::with_limit(4).order_by_field(size);
+        assert_matches!(
+            top_collector
+                .for_segment(0, segment)
+                .map(|_| ())
+                .unwrap_err(),
+            TantivyError::FastFieldError(_)
+        );
+    }
+
+    fn index(
+        query: &str,
+        query_field: Field,
+        schema: Schema,
+        mut doc_adder: impl FnMut(&mut IndexWriter) -> (),
+    ) -> (Index, Box<Query>) {
+        let index = Index::create_in_ram(schema);
+
+        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        doc_adder(&mut index_writer);
+        index_writer.commit().unwrap();
+        index.load_searchers().unwrap();
+
+        let query_parser = QueryParser::for_index(&index, vec![query_field]);
+        let query = query_parser.parse_query(query).unwrap();
+        (index, query)
+    }
+}
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -0,0 +1,200 @@
+use super::Collector;
+use collector::top_collector::TopCollector;
+use collector::top_collector::TopSegmentCollector;
+use collector::SegmentCollector;
+use collector::TopDocsByField;
+use fastfield::FastValue;
+use schema::Field;
+use DocAddress;
+use DocId;
+use Result;
+use Score;
+use SegmentLocalId;
+use SegmentReader;
+
+/// The Top Score Collector keeps track of the K documents
+/// sorted by their score.
+///
+/// The implementation is based on a `BinaryHeap`.
+/// The theorical complexity for collecting the top `K` out of `n` documents
+/// is `O(n log K)`.
+///
+/// ```rust
+/// #[macro_use]
+/// extern crate tantivy;
+/// use tantivy::DocAddress;
+/// use tantivy::schema::{Schema, TEXT};
+/// use tantivy::{Index, Result};
+/// use tantivy::collector::TopDocs;
+/// use tantivy::query::QueryParser;
+///
+/// # fn main() { example().unwrap(); }
+/// fn example() -> Result<()> {
+///     let mut schema_builder = Schema::builder();
+///     let title = schema_builder.add_text_field("title", TEXT);
+///     let schema = schema_builder.build();
+///     let index = Index::create_in_ram(schema);
+///     {
+///         let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
+///         index_writer.add_document(doc!(
+///             title => "The Name of the Wind",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "The Diary of Muadib",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "A Dairy Cow",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "The Diary of a Young Girl",
+///         ));
+///         index_writer.commit().unwrap();
+///     }
+///
+///     index.load_searchers()?;
+///     let searcher = index.searcher();
+///
+///     let query_parser = QueryParser::for_index(&index, vec![title]);
+///     let query = query_parser.parse_query("diary")?;
+///     let top_docs = searcher.search(&query, &TopDocs::with_limit(2))?;
+///
+///     assert_eq!(&top_docs[0], &(0.7261542, DocAddress(0, 1)));
+///     assert_eq!(&top_docs[1], &(0.6099695, DocAddress(0, 3)));
+///
+///     Ok(())
+/// }
+/// ```
+pub struct TopDocs(TopCollector<Score>);
+
+impl TopDocs {
+    /// Creates a top score collector, with a number of documents equal to "limit".
+    ///
+    /// # Panics
+    /// The method panics if limit is 0
+    pub fn with_limit(limit: usize) -> TopDocs {
+        TopDocs(TopCollector::with_limit(limit))
+    }
+
+    /// Set top-K to rank documents by a given fast field.
+    ///
+    /// (By default, `TopDocs` collects the top-K documents sorted by
+    /// the similarity score.)
+    pub fn order_by_field<T: PartialOrd + FastValue + Clone>(
+        self,
+        field: Field,
+    ) -> TopDocsByField<T> {
+        TopDocsByField::new(field, self.0.limit())
+    }
+}
+
+impl Collector for TopDocs {
+    type Fruit = Vec<(Score, DocAddress)>;
+
+    type Child = TopScoreSegmentCollector;
+
+    fn for_segment(
+        &self,
+        segment_local_id: SegmentLocalId,
+        reader: &SegmentReader,
+    ) -> Result<Self::Child> {
+        let collector = self.0.for_segment(segment_local_id, reader)?;
+        Ok(TopScoreSegmentCollector(collector))
+    }
+
+    fn requires_scoring(&self) -> bool {
+        true
+    }
+
+    fn merge_fruits(&self, child_fruits: Vec<Vec<(Score, DocAddress)>>) -> Result<Self::Fruit> {
+        self.0.merge_fruits(child_fruits)
+    }
+}
+
+/// Segment Collector associated to `TopDocs`.
+pub struct TopScoreSegmentCollector(TopSegmentCollector<Score>);
+
+impl SegmentCollector for TopScoreSegmentCollector {
+    type Fruit = Vec<(Score, DocAddress)>;
+
+    fn collect(&mut self, doc: DocId, score: Score) {
+        self.0.collect(doc, score)
+    }
+
+    fn harvest(self) -> Vec<(Score, DocAddress)> {
+        self.0.harvest()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::TopDocs;
+    use query::QueryParser;
+    use schema::Schema;
+    use schema::TEXT;
+    use DocAddress;
+    use Index;
+    use Score;
+
+    fn make_index() -> Index {
+        let mut schema_builder = Schema::builder();
+        let text_field = schema_builder.add_text_field("text", TEXT);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        {
+            // writing the segment
+            let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
+            index_writer.add_document(doc!(text_field=>"Hello happy tax payer."));
+            index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer"));
+            index_writer.add_document(doc!(text_field=>"I like Droopy"));
+            assert!(index_writer.commit().is_ok());
+        }
+        index.load_searchers().unwrap();
+        index
+    }
+
+    #[test]
+    fn test_top_collector_not_at_capacity() {
+        let index = make_index();
+        let field = index.schema().get_field("text").unwrap();
+        let query_parser = QueryParser::for_index(&index, vec![field]);
+        let text_query = query_parser.parse_query("droopy tax").unwrap();
+        let score_docs: Vec<(Score, DocAddress)> = index
+            .searcher()
+            .search(&text_query, &TopDocs::with_limit(4))
+            .unwrap();
+        assert_eq!(
+            score_docs,
+            vec![
+                (0.81221175, DocAddress(0u32, 1)),
+                (0.5376842, DocAddress(0u32, 2)),
+                (0.48527452, DocAddress(0, 0))
+            ]
+        );
+    }
+
+    #[test]
+    fn test_top_collector_at_capacity() {
+        let index = make_index();
+        let field = index.schema().get_field("text").unwrap();
+        let query_parser = QueryParser::for_index(&index, vec![field]);
+        let text_query = query_parser.parse_query("droopy tax").unwrap();
+        let score_docs: Vec<(Score, DocAddress)> = index
+            .searcher()
+            .search(&text_query, &TopDocs::with_limit(2))
+            .unwrap();
+        assert_eq!(
+            score_docs,
+            vec![
+                (0.81221175, DocAddress(0u32, 1)),
+                (0.5376842, DocAddress(0u32, 2)),
+            ]
+        );
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_top_0() {
+        TopDocs::with_limit(0);
+    }
+
+}
--- a/src/common/bitpacker.rs
+++ b/src/common/bitpacker.rs
@@ -1,9 +1,6 @@
-use common::serialize::BinarySerializable;
+use byteorder::{ByteOrder, LittleEndian, WriteBytesExt};
 use std::io;
-use std::io::Write;
-use std::mem;
 use std::ops::Deref;
-use std::ptr;

 pub(crate) struct BitPacker {
    mini_buffer: u64,
@@ -18,7 +15,7 @@ impl BitPacker {
        }
    }

-    pub fn write<TWrite: Write>(
+    pub fn write<TWrite: io::Write>(
        &mut self,
        val: u64,
        num_bits: u8,
@@ -28,14 +25,14 @@ impl BitPacker {
        let num_bits = num_bits as usize;
        if self.mini_buffer_written + num_bits > 64 {
            self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32);
-            self.mini_buffer.serialize(output)?;
+            output.write_u64::<LittleEndian>(self.mini_buffer)?;
            self.mini_buffer = val_u64.wrapping_shr((64 - self.mini_buffer_written) as u32);
            self.mini_buffer_written = self.mini_buffer_written + num_bits - 64;
        } else {
            self.mini_buffer |= val_u64 << self.mini_buffer_written;
            self.mini_buffer_written += num_bits;
            if self.mini_buffer_written == 64 {
-                self.mini_buffer.serialize(output)?;
+                output.write_u64::<LittleEndian>(self.mini_buffer)?;
                self.mini_buffer_written = 0;
                self.mini_buffer = 0u64;
            }
@@ -43,17 +40,18 @@ impl BitPacker {
        Ok(())
    }

-    pub fn flush<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
+    pub fn flush<TWrite: io::Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
        if self.mini_buffer_written > 0 {
            let num_bytes = (self.mini_buffer_written + 7) / 8;
-            let arr: [u8; 8] = unsafe { mem::transmute::<u64, [u8; 8]>(self.mini_buffer.to_le()) };
+            let mut arr: [u8; 8] = [0u8; 8];
+            LittleEndian::write_u64(&mut arr, self.mini_buffer);
            output.write_all(&arr[..num_bytes])?;
            self.mini_buffer_written = 0;
        }
        Ok(())
    }

-    pub fn close<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
+    pub fn close<TWrite: io::Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
        self.flush(output)?;
        // Padding the write file to simplify reads.
        output.write_all(&[0u8; 7])?;
@@ -102,8 +100,7 @@ where
            addr + 8 <= data.len(),
            "The fast field field should have been padded with 7 bytes."
        );
-        let val_unshifted_unmasked: u64 =
-            u64::from_le(unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) });
+        let val_unshifted_unmasked: u64 = LittleEndian::read_u64(&data[addr..]);
        let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
        val_shifted & mask
    }
@@ -125,8 +122,7 @@ where
            for output_val in output.iter_mut() {
                let addr = addr_in_bits >> 3;
                let bit_shift = addr_in_bits & 7;
-                let val_unshifted_unmasked: u64 =
-                    unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
+                let val_unshifted_unmasked: u64 = LittleEndian::read_u64(&data[addr..]);
                let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
                *output_val = val_shifted & mask;
                addr_in_bits += num_bits;
--- a/src/common/bitset.rs
+++ b/src/common/bitset.rs
@@ -34,17 +34,17 @@ impl TinySet {
    }

    /// Returns the complement of the set in `[0, 64[`.
-    fn complement(&self) -> TinySet {
+    fn complement(self) -> TinySet {
        TinySet(!self.0)
    }

    /// Returns true iff the `TinySet` contains the element `el`.
-    pub fn contains(&self, el: u32) -> bool {
+    pub fn contains(self, el: u32) -> bool {
        !self.intersect(TinySet::singleton(el)).is_empty()
    }

    /// Returns the intersection of `self` and `other`
-    pub fn intersect(&self, other: TinySet) -> TinySet {
+    pub fn intersect(self, other: TinySet) -> TinySet {
        TinySet(self.0 & other.0)
    }

@@ -77,7 +77,7 @@ impl TinySet {

    /// Returns true iff the `TinySet` is empty.
    #[inline(always)]
-    pub fn is_empty(&self) -> bool {
+    pub fn is_empty(self) -> bool {
        self.0 == 0u64
    }

@@ -114,7 +114,7 @@ impl TinySet {
        self.0 = 0u64;
    }

-    pub fn len(&self) -> u32 {
+    pub fn len(self) -> u32 {
        self.0.count_ones()
    }
 }
@@ -266,14 +266,14 @@ mod tests {

    #[test]
    fn test_bitset_large() {
-        let arr = generate_nonunique_unsorted(1_000_000, 50_000);
+        let arr = generate_nonunique_unsorted(100_000, 5_000);
        let mut btreeset: BTreeSet<u32> = BTreeSet::new();
-        let mut bitset = BitSet::with_max_value(1_000_000);
+        let mut bitset = BitSet::with_max_value(100_000);
        for el in arr {
            btreeset.insert(el);
            bitset.insert(el);
        }
-        for i in 0..1_000_000 {
+        for i in 0..100_000 {
            assert_eq!(btreeset.contains(&i), bitset.contains(i));
        }
        assert_eq!(btreeset.len(), bitset.len());
@@ -342,7 +342,7 @@ mod tests {
    #[test]
    fn test_bitset_clear() {
        let mut bitset = BitSet::with_max_value(1_000);
-        let els = tests::sample(1_000, 0.01f32);
+        let els = tests::sample(1_000, 0.01f64);
        for &el in &els {
            bitset.insert(el);
        }
--- a/src/common/composite_file.rs
+++ b/src/common/composite_file.rs
@@ -4,6 +4,8 @@ use common::VInt;
 use directory::ReadOnlySource;
 use directory::WritePtr;
 use schema::Field;
+use space_usage::FieldUsage;
+use space_usage::PerFieldSpaceUsage;
 use std::collections::HashMap;
 use std::io::Write;
 use std::io::{self, Read};
@@ -64,7 +66,7 @@ impl<W: Write> CompositeWrite<W> {
        &mut self.write
    }

-    /// Close the composite file.
+    /// Close the composite file
    ///
    /// An index of the different field offsets
    /// will be written as a footer.
@@ -72,7 +74,8 @@ impl<W: Write> CompositeWrite<W> {
        let footer_offset = self.write.written_bytes();
        VInt(self.offsets.len() as u64).serialize(&mut self.write)?;

-        let mut offset_fields: Vec<_> = self.offsets
+        let mut offset_fields: Vec<_> = self
+            .offsets
            .iter()
            .map(|(file_addr, offset)| (*offset, *file_addr))
            .collect();
@@ -112,7 +115,6 @@ impl CompositeFile {
        let end = data.len();
        let footer_len_data = data.slice_from(end - 4);
        let footer_len = u32::deserialize(&mut footer_len_data.as_slice())? as usize;
-
        let footer_start = end - 4 - footer_len;
        let footer_data = data.slice(footer_start, footer_start + footer_len);
        let mut footer_buffer = footer_data.as_slice();
@@ -166,6 +168,17 @@ impl CompositeFile {
            .get(&FileAddr { field, idx })
            .map(|&(from, to)| self.data.slice(from, to))
    }
+
+    pub fn space_usage(&self) -> PerFieldSpaceUsage {
+        let mut fields = HashMap::new();
+        for (&field_addr, &(start, end)) in self.offsets_index.iter() {
+            fields
+                .entry(field_addr.field)
+                .or_insert_with(|| FieldUsage::empty(field_addr.field))
+                .add_field_idx(field_addr.idx, end - start);
+        }
+        PerFieldSpaceUsage::new(fields)
+    }
 }

 #[cfg(test)]
--- a/src/common/vint.rs
+++ b/src/common/vint.rs
@@ -7,6 +7,8 @@ use std::io::Write;
 #[derive(Debug, Eq, PartialEq)]
 pub struct VInt(pub u64);

+const STOP_BIT: u8 = 128;
+
 impl VInt {
    pub fn val(&self) -> u64 {
        self.0
@@ -15,24 +17,34 @@ impl VInt {
    pub fn deserialize_u64<R: Read>(reader: &mut R) -> io::Result<u64> {
        VInt::deserialize(reader).map(|vint| vint.0)
    }
+
+    pub fn serialize_into_vec(&self, output: &mut Vec<u8>) {
+        let mut buffer = [0u8; 10];
+        let num_bytes = self.serialize_into(&mut buffer);
+        output.extend(&buffer[0..num_bytes]);
+    }
+
+    fn serialize_into(&self, buffer: &mut [u8; 10]) -> usize {
+        let mut remaining = self.0;
+        for (i, b) in buffer.iter_mut().enumerate() {
+            let next_byte: u8 = (remaining % 128u64) as u8;
+            remaining /= 128u64;
+            if remaining == 0u64 {
+                *b = next_byte | STOP_BIT;
+                return i + 1;
+            } else {
+                *b = next_byte;
+            }
+        }
+        unreachable!();
+    }
 }

 impl BinarySerializable for VInt {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
-        let mut remaining = self.0;
        let mut buffer = [0u8; 10];
-        let mut i = 0;
-        loop {
-            let next_byte: u8 = (remaining % 128u64) as u8;
-            remaining /= 128u64;
-            if remaining == 0u64 {
-                buffer[i] = next_byte | 128u8;
-                return writer.write_all(&buffer[0..i + 1]);
-            } else {
-                buffer[i] = next_byte;
-            }
-            i += 1;
-        }
+        let num_bytes = self.serialize_into(&mut buffer);
+        writer.write_all(&buffer[0..num_bytes])
    }

    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
@@ -42,20 +54,58 @@ impl BinarySerializable for VInt {
        loop {
            match bytes.next() {
                Some(Ok(b)) => {
-                    result += u64::from(b % 128u8) << shift;
-                    if b & 128u8 != 0u8 {
-                        break;
+                    result |= u64::from(b % 128u8) << shift;
+                    if b >= STOP_BIT {
+                        return Ok(VInt(result));
                    }
                    shift += 7;
                }
                _ => {
                    return Err(io::Error::new(
                        io::ErrorKind::InvalidData,
-                        "Reach end of buffer",
+                        "Reach end of buffer while reading VInt",
                    ))
                }
            }
        }
-        Ok(VInt(result))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::VInt;
+    use common::BinarySerializable;
+
+    fn aux_test_vint(val: u64) {
+        let mut v = [14u8; 10];
+        let num_bytes = VInt(val).serialize_into(&mut v);
+        for i in num_bytes..10 {
+            assert_eq!(v[i], 14u8);
+        }
+        assert!(num_bytes > 0);
+        if num_bytes < 10 {
+            assert!(1u64 << (7 * num_bytes) > val);
+        }
+        if num_bytes > 1 {
+            assert!(1u64 << (7 * (num_bytes - 1)) <= val);
+        }
+        let serdeser_val = VInt::deserialize(&mut &v[..]).unwrap();
+        assert_eq!(val, serdeser_val.0);
+    }
+
+    #[test]
+    fn test_vint() {
+        aux_test_vint(0);
+        aux_test_vint(1);
+        aux_test_vint(5);
+        aux_test_vint(u64::max_value());
+        for i in 1..9 {
+            let power_of_128 = 1u64 << (7 * i);
+            aux_test_vint(power_of_128 - 1u64);
+            aux_test_vint(power_of_128);
+            aux_test_vint(power_of_128 + 1u64);
+        }
+        aux_test_vint(10);
    }
 }
--- a/src/compression/stream.rs
+++ b/src/compression/stream.rs
@@ -1,160 +0,0 @@
-use compression::compressed_block_size;
-use compression::BlockDecoder;
-use compression::COMPRESSION_BLOCK_SIZE;
-use directory::ReadOnlySource;
-use owned_read::OwnedRead;
-
-/// Reads a stream of compressed ints.
-///
-/// Tantivy uses `CompressedIntStream` to read
-/// the position file.
-/// The `.skip(...)` makes it possible to avoid
-/// decompressing blocks that are not required.
-pub struct CompressedIntStream {
-    buffer: OwnedRead,
-
-    block_decoder: BlockDecoder,
-    cached_addr: usize,      // address of the currently decoded block
-    cached_next_addr: usize, // address following the currently decoded block
-
-    addr: usize, // address of the block associated to the current position
-    inner_offset: usize,
-}
-
-impl CompressedIntStream {
-    /// Opens a compressed int stream.
-    pub(crate) fn wrap(source: ReadOnlySource) -> CompressedIntStream {
-        CompressedIntStream {
-            buffer: OwnedRead::new(source),
-            block_decoder: BlockDecoder::new(),
-            cached_addr: usize::max_value(),
-            cached_next_addr: usize::max_value(),
-
-            addr: 0,
-            inner_offset: 0,
-        }
-    }
-
-    /// Loads the block at the given address and return the address of the
-    /// following block
-    pub fn read_block(&mut self, addr: usize) -> usize {
-        if self.cached_addr == addr {
-            // we are already on this block.
-            // no need to read.
-            self.cached_next_addr
-        } else {
-            let next_addr = addr + self.block_decoder
-                .uncompress_block_unsorted(self.buffer.slice_from(addr));
-            self.cached_addr = addr;
-            self.cached_next_addr = next_addr;
-            next_addr
-        }
-    }
-
-    /// Fills a buffer with the next `output.len()` integers.
-    /// This does not consume / advance the stream.
-    pub fn read(&mut self, output: &mut [u32]) {
-        let mut cursor = self.addr;
-        let mut inner_offset = self.inner_offset;
-        let mut num_els: usize = output.len();
-        let mut start = 0;
-        loop {
-            cursor = self.read_block(cursor);
-            let block = &self.block_decoder.output_array()[inner_offset..];
-            let block_len = block.len();
-            if num_els >= block_len {
-                output[start..start + block_len].clone_from_slice(&block);
-                start += block_len;
-                num_els -= block_len;
-                inner_offset = 0;
-            } else {
-                output[start..].clone_from_slice(&block[..num_els]);
-                break;
-            }
-        }
-    }
-
-    /// Skip the next `skip_len` integer.
-    ///
-    /// If a full block is skipped, calling
-    /// `.skip(...)` will avoid decompressing it.
-    ///
-    /// May panic if the end of the stream is reached.
-    pub fn skip(&mut self, mut skip_len: usize) {
-        loop {
-            let available = COMPRESSION_BLOCK_SIZE - self.inner_offset;
-            if available >= skip_len {
-                self.inner_offset += skip_len;
-                break;
-            } else {
-                skip_len -= available;
-                // entirely skip decompressing some blocks.
-                let num_bits: u8 = self.buffer.get(self.addr);
-                let block_len = compressed_block_size(num_bits);
-                self.addr += block_len;
-                self.inner_offset = 0;
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-pub mod tests {
-
-    use super::CompressedIntStream;
-    use compression::compressed_block_size;
-    use compression::BlockEncoder;
-    use compression::COMPRESSION_BLOCK_SIZE;
-    use directory::ReadOnlySource;
-
-    fn create_stream_buffer() -> ReadOnlySource {
-        let mut buffer: Vec<u8> = vec![];
-        let mut encoder = BlockEncoder::new();
-        let vals: Vec<u32> = (0u32..1152u32).collect();
-        for chunk in vals.chunks(COMPRESSION_BLOCK_SIZE) {
-            let compressed_block = encoder.compress_block_unsorted(chunk);
-            let num_bits = compressed_block[0];
-            assert_eq!(compressed_block_size(num_bits), compressed_block.len());
-            buffer.extend_from_slice(compressed_block);
-        }
-        if cfg!(simd) {
-            buffer.extend_from_slice(&[0u8; 7]);
-        }
-        ReadOnlySource::from(buffer)
-    }
-
-    #[test]
-    fn test_compressed_int_stream() {
-        let buffer = create_stream_buffer();
-        let mut stream = CompressedIntStream::wrap(buffer);
-        let mut block: [u32; COMPRESSION_BLOCK_SIZE] = [0u32; COMPRESSION_BLOCK_SIZE];
-
-        stream.read(&mut block[0..2]);
-        assert_eq!(block[0], 0);
-        assert_eq!(block[1], 1);
-
-        // reading does not consume the stream
-        stream.read(&mut block[0..2]);
-        assert_eq!(block[0], 0);
-        assert_eq!(block[1], 1);
-        stream.skip(2);
-
-        stream.skip(5);
-        stream.read(&mut block[0..3]);
-        stream.skip(3);
-
-        assert_eq!(block[0], 7);
-        assert_eq!(block[1], 8);
-        assert_eq!(block[2], 9);
-        stream.skip(500);
-        stream.read(&mut block[0..3]);
-        stream.skip(3);
-
-        assert_eq!(block[0], 510);
-        assert_eq!(block[1], 511);
-        assert_eq!(block[2], 512);
-        stream.skip(511);
-        stream.read(&mut block[..1]);
-        assert_eq!(block[0], 1024);
-    }
-}
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -0,0 +1,137 @@
+use crossbeam::channel;
+use scoped_pool::{Pool, ThreadConfig};
+use Result;
+
+/// Search executor whether search request are single thread or multithread.
+///
+/// We don't expose Rayon thread pool directly here for several reasons.
+///
+/// First dependency hell. It is not a good idea to expose the
+/// API of a dependency, knowing it might conflict with a different version
+/// used by the client. Second, we may stop using rayon in the future.
+pub enum Executor {
+    SingleThread,
+    ThreadPool(Pool),
+}
+
+impl Executor {
+    /// Creates an Executor that performs all task in the caller thread.
+    pub fn single_thread() -> Executor {
+        Executor::SingleThread
+    }
+
+    // Creates an Executor that dispatches the tasks in a thread pool.
+    pub fn multi_thread(num_threads: usize, prefix: &'static str) -> Executor {
+        let thread_config = ThreadConfig::new().prefix(prefix);
+        let pool = Pool::with_thread_config(num_threads, thread_config);
+        Executor::ThreadPool(pool)
+    }
+
+    // Perform a map in the thread pool.
+    //
+    // Regardless of the executor (`SingleThread` or `ThreadPool`), panics in the task
+    // will propagate to the caller.
+    pub fn map<
+        A: Send,
+        R: Send,
+        AIterator: Iterator<Item = A>,
+        F: Sized + Sync + Fn(A) -> Result<R>,
+    >(
+        &self,
+        f: F,
+        args: AIterator,
+    ) -> Result<Vec<R>> {
+        match self {
+            Executor::SingleThread => args.map(f).collect::<Result<_>>(),
+            Executor::ThreadPool(pool) => {
+                let args_with_indices: Vec<(usize, A)> = args.enumerate().collect();
+                let num_fruits = args_with_indices.len();
+                let fruit_receiver = {
+                    let (fruit_sender, fruit_receiver) = channel::unbounded();
+                    pool.scoped(|scope| {
+                        for arg_with_idx in args_with_indices {
+                            scope.execute(|| {
+                                let (idx, arg) = arg_with_idx;
+                                let fruit = f(arg);
+                                if let Err(err) = fruit_sender.send((idx, fruit)) {
+                                    error!("Failed to send search task. It probably means all search threads have panicked. {:?}", err);
+                                }
+                            });
+                        }
+                    });
+                    fruit_receiver
+                    // This ends the scope of fruit_sender.
+                    // This is important as it makes it possible for the fruit_receiver iteration to
+                    // terminate.
+                };
+                // This is lame, but it does not use unsafe code.
+                let mut results_with_position = Vec::with_capacity(num_fruits);
+                for (pos, fruit_res) in fruit_receiver {
+                    let fruit = fruit_res?;
+                    results_with_position.push((pos, fruit));
+                }
+                results_with_position.sort_by_key(|(pos, _)| *pos);
+                assert_eq!(results_with_position.len(), num_fruits);
+                Ok(results_with_position
+                    .into_iter()
+                    .map(|(_, fruit)| fruit)
+                    .collect::<Vec<_>>())
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::Executor;
+
+    #[test]
+    #[should_panic(expected = "panic should propagate")]
+    fn test_panic_propagates_single_thread() {
+        let _result: Vec<usize> = Executor::single_thread()
+            .map(
+                |_| {
+                    panic!("panic should propagate");
+                },
+                vec![0].into_iter(),
+            )
+            .unwrap();
+    }
+
+    #[test]
+    #[should_panic] //< unfortunately the panic message is not propagated
+    fn test_panic_propagates_multi_thread() {
+        let _result: Vec<usize> = Executor::multi_thread(1, "search-test")
+            .map(
+                |_| {
+                    panic!("panic should propagate");
+                },
+                vec![0].into_iter(),
+            )
+            .unwrap();
+    }
+
+    #[test]
+    fn test_map_singlethread() {
+        let result: Vec<usize> = Executor::single_thread()
+            .map(|i| Ok(i * 2), 0..1_000)
+            .unwrap();
+        assert_eq!(result.len(), 1_000);
+        for i in 0..1_000 {
+            assert_eq!(result[i], i * 2);
+        }
+    }
+
+}
+
+#[test]
+fn test_map_multithread() {
+    let result: Vec<usize> = Executor::multi_thread(3, "search-test")
+        .map(|i| Ok(i * 2), 0..10)
+        .unwrap();
+    assert_eq!(result.len(), 10);
+    for i in 0..10 {
+        assert_eq!(result[i], i * 2);
+    }
+}
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -1,18 +1,11 @@
-use core::SegmentId;
-use error::{ErrorKind, ResultExt};
-use schema::Schema;
-use serde_json;
-use std::borrow::BorrowMut;
-use std::fmt;
-use std::sync::Arc;
-use Result;
-
 use super::pool::LeasedItem;
 use super::pool::Pool;
 use super::segment::create_segment;
 use super::segment::Segment;
 use core::searcher::Searcher;
+use core::Executor;
 use core::IndexMeta;
+use core::SegmentId;
 use core::SegmentMeta;
 use core::SegmentReader;
 use core::META_FILEPATH;
@@ -20,32 +13,79 @@ use directory::ManagedDirectory;
 #[cfg(feature = "mmap")]
 use directory::MmapDirectory;
 use directory::{Directory, RAMDirectory};
+use error::DataCorruption;
+use error::TantivyError;
 use indexer::index_writer::open_index_writer;
 use indexer::index_writer::HEAP_SIZE_MIN;
 use indexer::segment_updater::save_new_metas;
-use indexer::DirectoryLock;
+use indexer::LockType;
 use num_cpus;
+use schema::Field;
+use schema::FieldType;
+use schema::Schema;
+use serde_json;
+use std::borrow::BorrowMut;
+use std::fmt;
 use std::path::Path;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
+use tokenizer::BoxedTokenizer;
 use tokenizer::TokenizerManager;
 use IndexWriter;
-
-const NUM_SEARCHERS: usize = 12;
+use Result;

 fn load_metas(directory: &Directory) -> Result<IndexMeta> {
    let meta_data = directory.atomic_read(&META_FILEPATH)?;
    let meta_string = String::from_utf8_lossy(&meta_data);
-    serde_json::from_str(&meta_string).chain_err(|| ErrorKind::CorruptedFile(META_FILEPATH.clone()))
+    serde_json::from_str(&meta_string)
+        .map_err(|e| {
+            DataCorruption::new(
+                META_FILEPATH.clone(),
+                format!("Meta file cannot be deserialized. {:?}.", e),
+            )
+        })
+        .map_err(From::from)
 }

 /// Search Index
 pub struct Index {
    directory: ManagedDirectory,
    schema: Schema,
+    num_searchers: Arc<AtomicUsize>,
    searcher_pool: Arc<Pool<Searcher>>,
+    executor: Arc<Executor>,
    tokenizers: TokenizerManager,
 }

 impl Index {
+    /// Examines the director to see if it contains an index
+    pub fn exists<Dir: Directory>(dir: &Dir) -> bool {
+        dir.exists(&META_FILEPATH)
+    }
+
+    /// Accessor to the search executor.
+    ///
+    /// This pool is used by default when calling `searcher.search(...)`
+    /// to perform search on the individual segments.
+    ///
+    /// By default the executor is single thread, and simply runs in the calling thread.
+    pub fn search_executor(&self) -> &Executor {
+        self.executor.as_ref()
+    }
+
+    /// Replace the default single thread search executor pool
+    /// by a thread pool with a given number of threads.
+    pub fn set_multithread_executor(&mut self, num_threads: usize) {
+        self.executor = Arc::new(Executor::multi_thread(num_threads, "thrd-tantivy-search-"));
+    }
+
+    /// Replace the default single thread search executor pool
+    /// by a thread pool with a given number of threads.
+    pub fn set_default_multithread_executor(&mut self) {
+        let default_num_threads = num_cpus::get();
+        self.set_multithread_executor(default_num_threads);
+    }
+
    /// Creates a new index using the `RAMDirectory`.
    ///
    /// The index will be allocated in anonymous memory.
@@ -62,9 +102,30 @@ impl Index {
    #[cfg(feature = "mmap")]
    pub fn create_in_dir<P: AsRef<Path>>(directory_path: P, schema: Schema) -> Result<Index> {
        let mmap_directory = MmapDirectory::open(directory_path)?;
+        if Index::exists(&mmap_directory) {
+            return Err(TantivyError::IndexAlreadyExists);
+        }
+
        Index::create(mmap_directory, schema)
    }

+    /// Opens or creates a new index in the provided directory
+    #[cfg(feature = "mmap")]
+    pub fn open_or_create<Dir: Directory>(dir: Dir, schema: Schema) -> Result<Index> {
+        if Index::exists(&dir) {
+            let index = Index::open(dir)?;
+            if index.schema() == schema {
+                Ok(index)
+            } else {
+                Err(TantivyError::SchemaError(
+                    "An index exists but the schema does not match.".to_string(),
+                ))
+            }
+        } else {
+            Index::create(dir, schema)
+        }
+    }
+
    /// Creates a new index in a temp directory.
    ///
    /// The index will use the `MMapDirectory` in a newly created directory.
@@ -81,13 +142,15 @@ impl Index {

    /// Creates a new index given an implementation of the trait `Directory`
    pub fn create<Dir: Directory>(dir: Dir, schema: Schema) -> Result<Index> {
-        let directory = ManagedDirectory::new(dir)?;
+        let directory = ManagedDirectory::wrap(dir)?;
        Index::from_directory(directory, schema)
    }

    /// Create a new index from a directory.
+    ///
+    /// This will overwrite existing meta.json
    fn from_directory(mut directory: ManagedDirectory, schema: Schema) -> Result<Index> {
-        save_new_metas(schema.clone(), 0, directory.borrow_mut())?;
+        save_new_metas(schema.clone(),  directory.borrow_mut())?;
        let metas = IndexMeta::with_schema(schema);
        Index::create_from_metas(directory, &metas)
    }
@@ -95,11 +158,14 @@ impl Index {
    /// Creates a new index given a directory and an `IndexMeta`.
    fn create_from_metas(directory: ManagedDirectory, metas: &IndexMeta) -> Result<Index> {
        let schema = metas.schema.clone();
+        let n_cpus = num_cpus::get();
        let index = Index {
            directory,
            schema,
+            num_searchers: Arc::new(AtomicUsize::new(n_cpus)),
            searcher_pool: Arc::new(Pool::new()),
            tokenizers: TokenizerManager::default(),
+            executor: Arc::new(Executor::single_thread()),
        };
        index.load_searchers()?;
        Ok(index)
@@ -110,6 +176,27 @@ impl Index {
        &self.tokenizers
    }

+    /// Helper to access the tokenizer associated to a specific field.
+    pub fn tokenizer_for_field(&self, field: Field) -> Result<Box<BoxedTokenizer>> {
+        let field_entry = self.schema.get_field_entry(field);
+        let field_type = field_entry.field_type();
+        let tokenizer_manager: &TokenizerManager = self.tokenizers();
+        let tokenizer_name_opt: Option<Box<BoxedTokenizer>> = match field_type {
+            FieldType::Str(text_options) => text_options
+                .get_indexing_options()
+                .map(|text_indexing_options| text_indexing_options.tokenizer().to_string())
+                .and_then(|tokenizer_name| tokenizer_manager.get(&tokenizer_name)),
+            _ => None,
+        };
+        match tokenizer_name_opt {
+            Some(tokenizer) => Ok(tokenizer),
+            None => Err(TantivyError::SchemaError(format!(
+                "{:?} is not a text field.",
+                field_entry.name()
+            ))),
+        }
+    }
+
    /// Opens a new directory from an index path.
    #[cfg(feature = "mmap")]
    pub fn open_in_dir<P: AsRef<Path>>(directory_path: P) -> Result<Index> {
@@ -119,7 +206,7 @@ impl Index {

    /// Open the index using the provided directory
    pub fn open<D: Directory>(directory: D) -> Result<Index> {
-        let directory = ManagedDirectory::new(directory)?;
+        let directory = ManagedDirectory::wrap(directory)?;
        let metas = load_metas(&directory)?;
        Index::create_from_metas(directory, &metas)
    }
@@ -153,7 +240,7 @@ impl Index {
        num_threads: usize,
        overall_heap_size_in_bytes: usize,
    ) -> Result<IndexWriter> {
-        let directory_lock = DirectoryLock::lock(self.directory().box_clone())?;
+        let directory_lock = LockType::IndexWriterLock.acquire_lock(&self.directory)?;
        let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads;
        open_index_writer(
            self,
@@ -191,7 +278,8 @@ impl Index {

    /// Returns the list of segments that are searchable
    pub fn searchable_segments(&self) -> Result<Vec<Segment>> {
-        Ok(self.searchable_segment_metas()?
+        Ok(self
+            .searchable_segment_metas()?
            .into_iter()
            .map(|segment_meta| self.segment(segment_meta))
            .collect())
@@ -226,27 +314,41 @@ impl Index {

    /// Returns the list of segment ids that are searchable.
    pub fn searchable_segment_ids(&self) -> Result<Vec<SegmentId>> {
-        Ok(self.searchable_segment_metas()?
+        Ok(self
+            .searchable_segment_metas()?
            .iter()
            .map(|segment_meta| segment_meta.id())
            .collect())
    }

-    /// Creates a new generation of searchers after
-
-    /// a change of the set of searchable indexes.
+    /// Sets the number of searchers to use
    ///
-    /// This needs to be called when a new segment has been
-    /// published or after a merge.
+    /// Only works after the next call to `load_searchers`
+    pub fn set_num_searchers(&mut self, num_searchers: usize) {
+        self.num_searchers.store(num_searchers, Ordering::Release);
+    }
+
+    /// Update searchers so that they reflect the state of the last
+    /// `.commit()`.
+    ///
+    /// If indexing happens in the same process as searching,
+    /// you most likely want to call `.load_searchers()` right after each
+    /// successful call to `.commit()`.
+    ///
+    /// If indexing and searching happen in different processes, the way to
+    /// get the freshest `index` at all time, is to watch `meta.json` and
+    /// call `load_searchers` whenever a changes happen.
    pub fn load_searchers(&self) -> Result<()> {
+        let _meta_lock = LockType::MetaLock.acquire_lock(self.directory())?;
        let searchable_segments = self.searchable_segments()?;
        let segment_readers: Vec<SegmentReader> = searchable_segments
            .iter()
            .map(SegmentReader::open)
            .collect::<Result<_>>()?;
        let schema = self.schema();
-        let searchers = (0..NUM_SEARCHERS)
-            .map(|_| Searcher::new(schema.clone(), segment_readers.clone()))
+        let num_searchers: usize = self.num_searchers.load(Ordering::Acquire);
+        let searchers = (0..num_searchers)
+            .map(|_| Searcher::new(schema.clone(), self.clone(), segment_readers.clone()))
            .collect();
        self.searcher_pool.publish_new_generation(searchers);
        Ok(())
@@ -256,7 +358,7 @@ impl Index {
    ///
    /// This method should be called every single time a search
    /// query is performed.
-    /// The searchers are taken from a pool of `NUM_SEARCHERS` searchers.
+    /// The searchers are taken from a pool of `num_searchers` searchers.
    /// If no searcher is available
    /// this may block.
    ///
@@ -278,8 +380,82 @@ impl Clone for Index {
        Index {
            directory: self.directory.clone(),
            schema: self.schema.clone(),
+            num_searchers: Arc::clone(&self.num_searchers),
            searcher_pool: Arc::clone(&self.searcher_pool),
            tokenizers: self.tokenizers.clone(),
+            executor: self.executor.clone(),
        }
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use directory::RAMDirectory;
+    use schema::{Schema, INT_INDEXED, TEXT};
+    use Index;
+
+    #[test]
+    fn test_indexer_for_field() {
+        let mut schema_builder = Schema::builder();
+        let num_likes_field = schema_builder.add_u64_field("num_likes", INT_INDEXED);
+        let body_field = schema_builder.add_text_field("body", TEXT);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        assert!(index.tokenizer_for_field(body_field).is_ok());
+        assert_eq!(
+            format!("{:?}", index.tokenizer_for_field(num_likes_field).err()),
+            "Some(SchemaError(\"\\\"num_likes\\\" is not a text field.\"))"
+        );
+    }
+
+    #[test]
+    fn test_index_exists() {
+        let directory = RAMDirectory::create();
+        assert!(!Index::exists(&directory));
+        assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
+        assert!(Index::exists(&directory));
+    }
+
+    #[test]
+    fn open_or_create_should_create() {
+        let directory = RAMDirectory::create();
+        assert!(!Index::exists(&directory));
+        assert!(Index::open_or_create(directory.clone(), throw_away_schema()).is_ok());
+        assert!(Index::exists(&directory));
+    }
+
+    #[test]
+    fn open_or_create_should_open() {
+        let directory = RAMDirectory::create();
+        assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
+        assert!(Index::exists(&directory));
+        assert!(Index::open_or_create(directory, throw_away_schema()).is_ok());
+    }
+
+    #[test]
+    fn create_should_wipeoff_existing() {
+        let directory = RAMDirectory::create();
+        assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
+        assert!(Index::exists(&directory));
+        assert!(Index::create(directory.clone(), Schema::builder().build()).is_ok());
+    }
+
+    #[test]
+    fn open_or_create_exists_but_schema_does_not_match() {
+        let directory = RAMDirectory::create();
+        assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
+        assert!(Index::exists(&directory));
+        assert!(Index::open_or_create(directory.clone(), throw_away_schema()).is_ok());
+        let err = Index::open_or_create(directory, Schema::builder().build());
+        assert_eq!(
+            format!("{:?}", err.unwrap_err()),
+            "SchemaError(\"An index exists but the schema does not match.\")"
+        );
+    }
+
+    fn throw_away_schema() -> Schema {
+        let mut schema_builder = Schema::builder();
+        let _ = schema_builder.add_u64_field("num_likes", INT_INDEXED);
+        schema_builder.build()
+    }
+}
--- a/src/core/index_meta.rs
+++ b/src/core/index_meta.rs
@@ -46,19 +46,19 @@ impl fmt::Debug for IndexMeta {
 mod tests {

    use super::IndexMeta;
-    use schema::{SchemaBuilder, TEXT};
+    use schema::{Schema, TEXT};
    use serde_json;

    #[test]
    fn test_serialize_metas() {
        let schema = {
-            let mut schema_builder = SchemaBuilder::new();
+            let mut schema_builder = Schema::builder();
            schema_builder.add_text_field("text", TEXT);
            schema_builder.build()
        };
        let index_metas = IndexMeta {
            segments: Vec::new(),
-            schema: schema,
+            schema,
            opstamp: 0u64,
            payload: None,
        };
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -1,14 +1,13 @@
 use common::BinarySerializable;
-use compression::CompressedIntStream;
 use directory::ReadOnlySource;
-use postings::FreqReadingOption;
+use owned_read::OwnedRead;
+use positions::PositionReader;
 use postings::TermInfo;
 use postings::{BlockSegmentPostings, SegmentPostings};
 use schema::FieldType;
 use schema::IndexRecordOption;
 use schema::Term;
 use termdict::TermDictionary;
-use owned_read::OwnedRead;

 /// The inverted index reader is in charge of accessing
 /// the inverted index associated to a specific field.
@@ -27,15 +26,18 @@ pub struct InvertedIndexReader {
    termdict: TermDictionary,
    postings_source: ReadOnlySource,
    positions_source: ReadOnlySource,
+    positions_idx_source: ReadOnlySource,
    record_option: IndexRecordOption,
    total_num_tokens: u64,
 }

 impl InvertedIndexReader {
+    #[cfg_attr(feature = "cargo-clippy", allow(clippy::needless_pass_by_value))] // for symetry
    pub(crate) fn new(
        termdict: TermDictionary,
        postings_source: ReadOnlySource,
        positions_source: ReadOnlySource,
+        positions_idx_source: ReadOnlySource,
        record_option: IndexRecordOption,
    ) -> InvertedIndexReader {
        let total_num_tokens_data = postings_source.slice(0, 8);
@@ -45,6 +47,7 @@ impl InvertedIndexReader {
            termdict,
            postings_source: postings_source.slice_from(8),
            positions_source,
+            positions_idx_source,
            record_option,
            total_num_tokens,
        }
@@ -52,14 +55,15 @@ impl InvertedIndexReader {

    /// Creates an empty `InvertedIndexReader` object, which
    /// contains no terms at all.
-    pub fn empty(field_type: FieldType) -> InvertedIndexReader {
+    pub fn empty(field_type: &FieldType) -> InvertedIndexReader {
        let record_option = field_type
            .get_index_record_option()
            .unwrap_or(IndexRecordOption::Basic);
        InvertedIndexReader {
-            termdict: TermDictionary::empty(field_type),
+            termdict: TermDictionary::empty(&field_type),
            postings_source: ReadOnlySource::empty(),
            positions_source: ReadOnlySource::empty(),
+            positions_idx_source: ReadOnlySource::empty(),
            record_option,
            total_num_tokens: 0u64,
        }
@@ -94,7 +98,20 @@ impl InvertedIndexReader {
        let end_source = self.postings_source.len();
        let postings_slice = self.postings_source.slice(offset, end_source);
        let postings_reader = OwnedRead::new(postings_slice);
-        block_postings.reset(term_info.doc_freq as usize, postings_reader);
+        block_postings.reset(term_info.doc_freq, postings_reader);
+    }
+
+    /// Returns a block postings given a `Term`.
+    /// This method is for an advanced usage only.
+    ///
+    /// Most user should prefer using `read_postings` instead.
+    pub fn read_block_postings(
+        &self,
+        term: &Term,
+        option: IndexRecordOption,
+    ) -> Option<BlockSegmentPostings> {
+        self.get_term_info(term)
+            .map(move |term_info| self.read_block_postings_from_terminfo(&term_info, option))
    }

    /// Returns a block postings given a `term_info`.
@@ -108,15 +125,11 @@ impl InvertedIndexReader {
    ) -> BlockSegmentPostings {
        let offset = term_info.postings_offset as usize;
        let postings_data = self.postings_source.slice_from(offset);
-        let freq_reading_option = match (self.record_option, requested_option) {
-            (IndexRecordOption::Basic, _) => FreqReadingOption::NoFreq,
-            (_, IndexRecordOption::Basic) => FreqReadingOption::SkipFreq,
-            (_, _) => FreqReadingOption::ReadFreq,
-        };
        BlockSegmentPostings::from_data(
-            term_info.doc_freq as usize,
+            term_info.doc_freq,
            OwnedRead::new(postings_data),
-            freq_reading_option,
+            self.record_option,
+            requested_option,
        )
    }

@@ -132,11 +145,11 @@ impl InvertedIndexReader {
        let block_postings = self.read_block_postings_from_terminfo(term_info, option);
        let position_stream = {
            if option.has_positions() {
-                let position_offset = term_info.positions_offset;
-                let positions_source = self.positions_source.slice_from(position_offset as usize);
-                let mut stream = CompressedIntStream::wrap(positions_source);
-                stream.skip(term_info.positions_inner_offset as usize);
-                Some(stream)
+                let position_reader = self.positions_source.clone();
+                let skip_reader = self.positions_idx_source.clone();
+                let position_reader =
+                    PositionReader::new(position_reader, skip_reader, term_info.positions_idx);
+                Some(position_reader)
            } else {
                None
            }
@@ -161,8 +174,8 @@ impl InvertedIndexReader {
    /// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
    /// with `DocId`s and frequencies.
    pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
-        let term_info = get!(self.get_term_info(term));
-        Some(self.read_postings_from_terminfo(&term_info, option))
+        self.get_term_info(term)
+            .map(move |term_info| self.read_postings_from_terminfo(&term_info, option))
    }

    pub(crate) fn read_postings_no_deletes(
@@ -170,8 +183,8 @@ impl InvertedIndexReader {
        term: &Term,
        option: IndexRecordOption,
    ) -> Option<SegmentPostings> {
-        let term_info = get!(self.get_term_info(term));
-        Some(self.read_postings_from_terminfo(&term_info, option))
+        self.get_term_info(term)
+            .map(|term_info| self.read_postings_from_terminfo(&term_info, option))
    }

    /// Returns the number of documents containing the term.
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -1,3 +1,4 @@
+mod executor;
 pub mod index;
 mod index_meta;
 mod inverted_index_reader;
@@ -9,6 +10,7 @@ mod segment_id;
 mod segment_meta;
 mod segment_reader;

+pub use self::executor::Executor;
 pub use self::index::Index;
 pub use self::index_meta::IndexMeta;
 pub use self::inverted_index_reader::InvertedIndexReader;
@@ -33,10 +35,4 @@ lazy_static! {
    /// Removing this file is safe, but will prevent the garbage collection of all of the file that
    /// are currently in the directory
    pub static ref MANAGED_FILEPATH: PathBuf = PathBuf::from(".managed.json");
-
-    /// Only one process should be able to write tantivy's index at a time.
-    /// This file, when present, is in charge of preventing other processes to open an IndexWriter.
-    ///
-    /// If the process is killed and this file remains, it is safe to remove it manually.
-    pub static ref LOCKFILE_FILEPATH: PathBuf = PathBuf::from(".tantivy-indexer.lock");
 }
--- a/src/core/pool.rs
+++ b/src/core/pool.rs
@@ -1,4 +1,4 @@
-use crossbeam::sync::MsQueue;
+use crossbeam::queue::MsQueue;
 use std::mem;
 use std::ops::{Deref, DerefMut};
 use std::sync::atomic::AtomicUsize;
@@ -87,7 +87,8 @@ impl<T> Deref for LeasedItem<T> {
    type Target = T;

    fn deref(&self) -> &T {
-        &self.gen_item
+        &self
+            .gen_item
            .as_ref()
            .expect("Unwrapping a leased item should never fail")
            .item // unwrap is safe here
@@ -96,7 +97,8 @@ impl<T> Deref for LeasedItem<T> {

 impl<T> DerefMut for LeasedItem<T> {
    fn deref_mut(&mut self) -> &mut T {
-        &mut self.gen_item
+        &mut self
+            .gen_item
            .as_mut()
            .expect("Unwrapping a mut leased item should never fail")
            .item // unwrap is safe here
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -1,16 +1,43 @@
 use collector::Collector;
+use collector::SegmentCollector;
+use core::Executor;
 use core::InvertedIndexReader;
 use core::SegmentReader;
 use query::Query;
+use query::Scorer;
+use query::Weight;
 use schema::Document;
 use schema::Schema;
 use schema::{Field, Term};
+use space_usage::SearcherSpaceUsage;
 use std::fmt;
 use std::sync::Arc;
+use store::StoreReader;
 use termdict::TermMerger;
 use DocAddress;
+use Index;
 use Result;

+fn collect_segment<C: Collector>(
+    collector: &C,
+    weight: &Weight,
+    segment_ord: u32,
+    segment_reader: &SegmentReader,
+) -> Result<C::Fruit> {
+    let mut scorer = weight.scorer(segment_reader)?;
+    let mut segment_collector = collector.for_segment(segment_ord as u32, segment_reader)?;
+    if let Some(delete_bitset) = segment_reader.delete_bitset() {
+        scorer.for_each(&mut |doc, score| {
+            if !delete_bitset.is_deleted(doc) {
+                segment_collector.collect(doc, score);
+            }
+        });
+    } else {
+        scorer.for_each(&mut |doc, score| segment_collector.collect(doc, score));
+    }
+    Ok(segment_collector.harvest())
+}
+
 /// Holds a list of `SegmentReader`s ready for search.
 ///
 /// It guarantees that the `Segment` will not be removed before
@@ -18,25 +45,43 @@ use Result;
 ///
 pub struct Searcher {
    schema: Schema,
+    index: Index,
    segment_readers: Vec<SegmentReader>,
+    store_readers: Vec<StoreReader>,
 }

 impl Searcher {
    /// Creates a new `Searcher`
-    pub(crate) fn new(schema: Schema, segment_readers: Vec<SegmentReader>) -> Searcher {
+    pub(crate) fn new(
+        schema: Schema,
+        index: Index,
+        segment_readers: Vec<SegmentReader>,
+    ) -> Searcher {
+        let store_readers = segment_readers
+            .iter()
+            .map(|segment_reader| segment_reader.get_store_reader())
+            .collect();
        Searcher {
            schema,
+            index,
            segment_readers,
+            store_readers,
        }
    }
+
+    /// Returns the `Index` associated to the `Searcher`
+    pub fn index(&self) -> &Index {
+        &self.index
+    }
+
    /// Fetches a document from tantivy's store given a `DocAddress`.
    ///
    /// The searcher uses the segment ordinal to route the
    /// the request to the right `Segment`.
-    pub fn doc(&self, doc_address: &DocAddress) -> Result<Document> {
-        let DocAddress(segment_local_id, doc_id) = *doc_address;
-        let segment_reader = &self.segment_readers[segment_local_id as usize];
-        segment_reader.doc(doc_id)
+    pub fn doc(&self, doc_address: DocAddress) -> Result<Document> {
+        let DocAddress(segment_local_id, doc_id) = doc_address;
+        let store_reader = &self.store_readers[segment_local_id as usize];
+        store_reader.get(doc_id)
    }

    /// Access the schema associated to the index of this searcher.
@@ -48,7 +93,7 @@ impl Searcher {
    pub fn num_docs(&self) -> u64 {
        self.segment_readers
            .iter()
-            .map(|segment_reader| segment_reader.num_docs() as u64)
+            .map(|segment_reader| u64::from(segment_reader.num_docs()))
            .sum::<u64>()
    }

@@ -57,7 +102,9 @@ impl Searcher {
    pub fn doc_freq(&self, term: &Term) -> u64 {
        self.segment_readers
            .iter()
-            .map(|segment_reader| segment_reader.inverted_index(term.field()).doc_freq(term) as u64)
+            .map(|segment_reader| {
+                u64::from(segment_reader.inverted_index(term.field()).doc_freq(term))
+            })
            .sum::<u64>()
    }

@@ -71,19 +118,78 @@ impl Searcher {
        &self.segment_readers[segment_ord as usize]
    }

-    /// Runs a query on the segment readers wrapped by the searcher
-    pub fn search<C: Collector>(&self, query: &Query, collector: &mut C) -> Result<()> {
-        query.search(self, collector)
+    /// Runs a query on the segment readers wrapped by the searcher.
+    ///
+    /// Search works as follows :
+    ///
+    ///  First the weight object associated to the query is created.
+    ///
+    ///  Then, the query loops over the segments and for each segment :
+    ///  - setup the collector and informs it that the segment being processed has changed.
+    ///  - creates a SegmentCollector for collecting documents associated to the segment
+    ///  - creates a `Scorer` object associated for this segment
+    ///  - iterate through the matched documents and push them to the segment collector.
+    ///
+    ///  Finally, the Collector merges each of the child collectors into itself for result usability
+    ///  by the caller.
+    pub fn search<C: Collector>(&self, query: &Query, collector: &C) -> Result<C::Fruit> {
+        let executor = self.index.search_executor();
+        self.search_with_executor(query, collector, executor)
+    }
+
+    /// Same as [`search(...)`](#method.search) but multithreaded.
+    ///
+    /// The current implementation is rather naive :
+    /// multithreading is by splitting search into as many task
+    /// as there are segments.
+    ///
+    /// It is powerless at making search faster if your index consists in
+    /// one large segment.
+    ///
+    /// Also, keep in my multithreading a single query on several
+    /// threads will not improve your throughput. It can actually
+    /// hurt it. It will however, decrease the average response time.
+    pub fn search_with_executor<C: Collector>(
+        &self,
+        query: &Query,
+        collector: &C,
+        executor: &Executor,
+    ) -> Result<C::Fruit> {
+        let scoring_enabled = collector.requires_scoring();
+        let weight = query.weight(self, scoring_enabled)?;
+        let segment_readers = self.segment_readers();
+        let fruits = executor.map(
+            |(segment_ord, segment_reader)| {
+                collect_segment(
+                    collector,
+                    weight.as_ref(),
+                    segment_ord as u32,
+                    segment_reader,
+                )
+            },
+            segment_readers.iter().enumerate(),
+        )?;
+        collector.merge_fruits(fruits)
    }

    /// Return the field searcher associated to a `Field`.
    pub fn field(&self, field: Field) -> FieldSearcher {
-        let inv_index_readers = self.segment_readers
+        let inv_index_readers = self
+            .segment_readers
            .iter()
            .map(|segment_reader| segment_reader.inverted_index(field))
            .collect::<Vec<_>>();
        FieldSearcher::new(inv_index_readers)
    }
+
+    /// Summarize total space usage of this searcher.
+    pub fn space_usage(&self) -> SearcherSpaceUsage {
+        let mut space_usage = SearcherSpaceUsage::new();
+        for segment_reader in self.segment_readers.iter() {
+            space_usage.add_segment(segment_reader.space_usage());
+        }
+        space_usage
+    }
 }

 pub struct FieldSearcher {
@@ -98,7 +204,8 @@ impl FieldSearcher {
    /// Returns a Stream over all of the sorted unique terms of
    /// for the given field.
    pub fn terms(&self) -> TermMerger {
-        let term_streamers: Vec<_> = self.inv_index_readers
+        let term_streamers: Vec<_> = self
+            .inv_index_readers
            .iter()
            .map(|inverted_index| inverted_index.terms().stream())
            .collect();
@@ -108,7 +215,8 @@ impl FieldSearcher {

 impl fmt::Debug for Searcher {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let segment_ids = self.segment_readers
+        let segment_ids = self
+            .segment_readers
            .iter()
            .map(|segment_reader| segment_reader.segment_id())
            .collect::<Vec<_>>();
--- a/src/core/segment_component.rs
+++ b/src/core/segment_component.rs
@@ -10,6 +10,8 @@ pub enum SegmentComponent {
    POSTINGS,
    /// Positions of terms in each document.
    POSITIONS,
+    /// Index to seek within the position file
+    POSITIONSSKIP,
    /// Column-oriented random-access storage of fields.
    FASTFIELDS,
    /// Stores the sum  of the length (in terms) of each field for each document.
@@ -29,9 +31,10 @@ pub enum SegmentComponent {
 impl SegmentComponent {
    /// Iterates through the components.
    pub fn iterator() -> slice::Iter<'static, SegmentComponent> {
-        static SEGMENT_COMPONENTS: [SegmentComponent; 7] = [
+        static SEGMENT_COMPONENTS: [SegmentComponent; 8] = [
            SegmentComponent::POSTINGS,
            SegmentComponent::POSITIONS,
+            SegmentComponent::POSITIONSSKIP,
            SegmentComponent::FASTFIELDS,
            SegmentComponent::FIELDNORMS,
            SegmentComponent::TERMS,
--- a/src/core/segment_id.rs
+++ b/src/core/segment_id.rs
@@ -52,12 +52,12 @@ impl SegmentId {
    /// Picking the first 8 chars is ok to identify
    /// segments in a display message.
    pub fn short_uuid_string(&self) -> String {
-        (&self.0.simple().to_string()[..8]).to_string()
+        (&self.0.to_simple_ref().to_string()[..8]).to_string()
    }

    /// Returns a segment uuid string.
    pub fn uuid_string(&self) -> String {
-        self.0.simple().to_string()
+        self.0.to_simple_ref().to_string()
    }
 }

--- a/src/core/segment_meta.rs
+++ b/src/core/segment_meta.rs
@@ -50,7 +50,7 @@ impl<'a> serde::Deserialize<'a> for SegmentMeta {
    {
        let inner = InnerSegmentMeta::deserialize(deserializer)?;
        let tracked = INVENTORY.track(inner);
-        Ok(SegmentMeta { tracked: tracked })
+        Ok(SegmentMeta { tracked })
    }
 }

@@ -110,8 +110,9 @@ impl SegmentMeta {
    pub fn relative_path(&self, component: SegmentComponent) -> PathBuf {
        let mut path = self.id().uuid_string();
        path.push_str(&*match component {
-            SegmentComponent::POSITIONS => ".pos".to_string(),
            SegmentComponent::POSTINGS => ".idx".to_string(),
+            SegmentComponent::POSITIONS => ".pos".to_string(),
+            SegmentComponent::POSITIONSSKIP => ".posidx".to_string(),
            SegmentComponent::TERMS => ".term".to_string(),
            SegmentComponent::STORE => ".store".to_string(),
            SegmentComponent::FASTFIELDS => ".fast".to_string(),
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -4,8 +4,8 @@ use core::InvertedIndexReader;
 use core::Segment;
 use core::SegmentComponent;
 use core::SegmentId;
-use core::SegmentMeta;
-use error::ErrorKind;
+use directory::ReadOnlySource;
+use error::TantivyError;
 use fastfield::DeleteBitSet;
 use fastfield::FacetReader;
 use fastfield::FastFieldReader;
@@ -13,10 +13,10 @@ use fastfield::{self, FastFieldNotAvailableError};
 use fastfield::{BytesFastFieldReader, FastValue, MultiValueIntFastFieldReader};
 use fieldnorm::FieldNormReader;
 use schema::Cardinality;
-use schema::Document;
 use schema::Field;
 use schema::FieldType;
 use schema::Schema;
+use space_usage::SegmentSpaceUsage;
 use std::collections::HashMap;
 use std::fmt;
 use std::sync::Arc;
@@ -44,15 +44,17 @@ pub struct SegmentReader {
    inv_idx_reader_cache: Arc<RwLock<HashMap<Field, Arc<InvertedIndexReader>>>>,

    segment_id: SegmentId,
-    segment_meta: SegmentMeta,
+    max_doc: DocId,
+    num_docs: DocId,

    termdict_composite: CompositeFile,
    postings_composite: CompositeFile,
    positions_composite: CompositeFile,
+    positions_idx_composite: CompositeFile,
    fast_fields_composite: CompositeFile,
    fieldnorms_composite: CompositeFile,

-    store_reader: StoreReader,
+    store_source: ReadOnlySource,
    delete_bitset_opt: Option<DeleteBitSet>,
    schema: Schema,
 }
@@ -63,7 +65,7 @@ impl SegmentReader {
    /// Today, `tantivy` does not handle deletes, so it happens
    /// to also be the number of documents in the index.
    pub fn max_doc(&self) -> DocId {
-        self.segment_meta.max_doc()
+        self.max_doc
    }

    /// Returns the number of documents.
@@ -72,7 +74,7 @@ impl SegmentReader {
    /// Today, `tantivy` does not handle deletes so max doc and
    /// num_docs are the same.
    pub fn num_docs(&self) -> DocId {
-        self.segment_meta.num_docs()
+        self.num_docs
    }

    /// Returns the schema of the index this segment belongs to.
@@ -152,15 +154,17 @@ impl SegmentReader {
    /// Accessor to the `BytesFastFieldReader` associated to a given `Field`.
    pub fn bytes_fast_field_reader(&self, field: Field) -> fastfield::Result<BytesFastFieldReader> {
        let field_entry = self.schema.get_field_entry(field);
-        match field_entry.field_type() {
-            &FieldType::Bytes => {}
+        match *field_entry.field_type() {
+            FieldType::Bytes => {}
            _ => return Err(FastFieldNotAvailableError::new(field_entry)),
        }
-        let idx_reader = self.fast_fields_composite
+        let idx_reader = self
+            .fast_fields_composite
            .open_read_with_idx(field, 0)
            .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
            .map(FastFieldReader::open)?;
-        let values = self.fast_fields_composite
+        let values = self
+            .fast_fields_composite
            .open_read_with_idx(field, 1)
            .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))?;
        Ok(BytesFastFieldReader::open(idx_reader, values))
@@ -170,22 +174,22 @@ impl SegmentReader {
    pub fn facet_reader(&self, field: Field) -> Result<FacetReader> {
        let field_entry = self.schema.get_field_entry(field);
        if field_entry.field_type() != &FieldType::HierarchicalFacet {
-            return Err(ErrorKind::InvalidArgument(format!(
+            return Err(TantivyError::InvalidArgument(format!(
                "The field {:?} is not a \
                 hierarchical facet.",
                field_entry
-            )).into());
+            )));
        }
        let term_ords_reader = self.multi_fast_field_reader(field)?;
        let termdict_source = self.termdict_composite.open_read(field).ok_or_else(|| {
-            ErrorKind::InvalidArgument(format!(
+            TantivyError::InvalidArgument(format!(
                "The field \"{}\" is a hierarchical \
                 but this segment does not seem to have the field term \
                 dictionary.",
                field_entry.name()
            ))
        })?;
-        let termdict = TermDictionary::from_source(termdict_source);
+        let termdict = TermDictionary::from_source(&termdict_source);
        let facet_reader = FacetReader::new(term_ords_reader, termdict);
        Ok(facet_reader)
    }
@@ -193,8 +197,7 @@ impl SegmentReader {
    /// Accessor to the segment's `Field norms`'s reader.
    ///
    /// Field norms are the length (in tokens) of the fields.
-    /// It is used in the computation of the [TfIdf]
-    /// (https://fulmicoton.gitbooks.io/tantivy-doc/content/tfidf.html).
+    /// It is used in the computation of the [TfIdf](https://fulmicoton.gitbooks.io/tantivy-doc/content/tfidf.html).
    ///
    /// They are simply stored as a fast field, serialized in
    /// the `.fieldnorm` file of the segment.
@@ -212,8 +215,8 @@ impl SegmentReader {
    }

    /// Accessor to the segment's `StoreReader`.
-    pub fn get_store_reader(&self) -> &StoreReader {
-        &self.store_reader
+    pub fn get_store_reader(&self) -> StoreReader {
+        StoreReader::from_source(self.store_source.clone())
    }

    /// Open a new segment for reading.
@@ -222,7 +225,8 @@ impl SegmentReader {
        let termdict_composite = CompositeFile::open(&termdict_source)?;

        let store_source = segment.open_read(SegmentComponent::STORE)?;
-        let store_reader = StoreReader::from_source(store_source);
+
+        fail_point!("SegmentReader::open#middle");

        let postings_source = segment.open_read(SegmentComponent::POSTINGS)?;
        let postings_composite = CompositeFile::open(&postings_source)?;
@@ -235,6 +239,14 @@ impl SegmentReader {
            }
        };

+        let positions_idx_composite = {
+            if let Ok(source) = segment.open_read(SegmentComponent::POSITIONSSKIP) {
+                CompositeFile::open(&source)?
+            } else {
+                CompositeFile::empty()
+            }
+        };
+
        let fast_fields_data = segment.open_read(SegmentComponent::FASTFIELDS)?;
        let fast_fields_composite = CompositeFile::open(&fast_fields_data)?;

@@ -251,15 +263,17 @@ impl SegmentReader {
        let schema = segment.schema();
        Ok(SegmentReader {
            inv_idx_reader_cache: Arc::new(RwLock::new(HashMap::new())),
-            segment_meta: segment.meta().clone(),
+            max_doc: segment.meta().max_doc(),
+            num_docs: segment.meta().num_docs(),
            termdict_composite,
            postings_composite,
            fast_fields_composite,
            fieldnorms_composite,
            segment_id: segment.id(),
-            store_reader,
+            store_source,
            delete_bitset_opt,
            positions_composite,
+            positions_idx_composite,
            schema,
        })
    }
@@ -272,7 +286,8 @@ impl SegmentReader {
    /// term dictionary associated to a specific field,
    /// and opening the posting list associated to any term.
    pub fn inverted_index(&self, field: Field) -> Arc<InvertedIndexReader> {
-        if let Some(inv_idx_reader) = self.inv_idx_reader_cache
+        if let Some(inv_idx_reader) = self
+            .inv_idx_reader_cache
            .read()
            .expect("Lock poisoned. This should never happen")
            .get(&field)
@@ -296,23 +311,31 @@ impl SegmentReader {
            // As a result, no data is associated to the inverted index.
            //
            // Returns an empty inverted index.
-            return Arc::new(InvertedIndexReader::empty(field_type.clone()));
+            return Arc::new(InvertedIndexReader::empty(field_type));
        }

        let postings_source = postings_source_opt.unwrap();

-        let termdict_source = self.termdict_composite
+        let termdict_source = self
+            .termdict_composite
            .open_read(field)
            .expect("Failed to open field term dictionary in composite file. Is the field indexed");

-        let positions_source = self.positions_composite
+        let positions_source = self
+            .positions_composite
+            .open_read(field)
+            .expect("Index corrupted. Failed to open field positions in composite file.");
+
+        let positions_idx_source = self
+            .positions_idx_composite
            .open_read(field)
            .expect("Index corrupted. Failed to open field positions in composite file.");

        let inv_idx_reader = Arc::new(InvertedIndexReader::new(
-            TermDictionary::from_source(termdict_source),
+            TermDictionary::from_source(&termdict_source),
            postings_source,
            positions_source,
+            positions_idx_source,
            record_option,
        ));

@@ -326,14 +349,6 @@ impl SegmentReader {
        inv_idx_reader
    }

-    /// Returns the document (or to be accurate, its stored field)
-    /// bearing the given doc id.
-    /// This method is slow and should seldom be called from
-    /// within a collector.
-    pub fn doc(&self, doc_id: DocId) -> Result<Document> {
-        self.store_reader.get(doc_id)
-    }
-
    /// Returns the segment id
    pub fn segment_id(&self) -> SegmentId {
        self.segment_id
@@ -357,6 +372,24 @@ impl SegmentReader {
    pub fn doc_ids_alive(&self) -> SegmentReaderAliveDocsIterator {
        SegmentReaderAliveDocsIterator::new(&self)
    }
+
+    /// Summarize total space usage of this segment.
+    pub fn space_usage(&self) -> SegmentSpaceUsage {
+        SegmentSpaceUsage::new(
+            self.num_docs(),
+            self.termdict_composite.space_usage(),
+            self.postings_composite.space_usage(),
+            self.positions_composite.space_usage(),
+            self.positions_idx_composite.space_usage(),
+            self.fast_fields_composite.space_usage(),
+            self.fieldnorms_composite.space_usage(),
+            self.get_store_reader().space_usage(),
+            self.delete_bitset_opt
+                .as_ref()
+                .map(|x| x.space_usage())
+                .unwrap_or(0),
+        )
+    }
 }

 impl fmt::Debug for SegmentReader {
@@ -376,7 +409,7 @@ pub struct SegmentReaderAliveDocsIterator<'a> {
 impl<'a> SegmentReaderAliveDocsIterator<'a> {
    pub fn new(reader: &'a SegmentReader) -> SegmentReaderAliveDocsIterator<'a> {
        SegmentReaderAliveDocsIterator {
-            reader: reader,
+            reader,
            max_doc: reader.max_doc(),
            current: 0,
        }
@@ -414,12 +447,12 @@ impl<'a> Iterator for SegmentReaderAliveDocsIterator<'a> {
 #[cfg(test)]
 mod test {
    use core::Index;
-    use schema::{SchemaBuilder, Term, STORED, TEXT};
+    use schema::{Schema, Term, STORED, TEXT};
    use DocId;

    #[test]
    fn test_alive_docs_iterator() {
-        let mut schema_builder = SchemaBuilder::new();
+        let mut schema_builder = Schema::builder();
        schema_builder.add_text_field("name", TEXT | STORED);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema.clone());
--- a/src/directory/directory.rs
+++ b/src/directory/directory.rs
@@ -17,7 +17,7 @@ use std::result;
 /// - The [`RAMDirectory`](struct.RAMDirectory.html), which
 /// should be used mostly for tests.
 ///
-pub trait Directory: fmt::Debug + Send + Sync + 'static {
+pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
    /// Opens a virtual file for read.
    ///
    /// Once a virtual file is open, its data may not
@@ -73,7 +73,19 @@ pub trait Directory: fmt::Debug + Send + Sync + 'static {
    ///
    /// The file may or may not previously exist.
    fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()>;
+}

+/// DirectoryClone
+pub trait DirectoryClone {
    /// Clones the directory and boxes the clone
    fn box_clone(&self) -> Box<Directory>;
 }
+
+impl<T> DirectoryClone for T
+where
+    T: 'static + Directory + Clone,
+{
+    fn box_clone(&self) -> Box<Directory> {
+        Box::new(self.clone())
+    }
+}
--- a/src/directory/managed_directory.rs
+++ b/src/directory/managed_directory.rs
@@ -1,7 +1,8 @@
 use core::MANAGED_FILEPATH;
 use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError};
 use directory::{ReadOnlySource, WritePtr};
-use error::{ErrorKind, Result, ResultExt};
+use error::DataCorruption;
+use indexer::LockType;
 use serde_json;
 use std::collections::HashSet;
 use std::io;
@@ -11,6 +12,18 @@ use std::result;
 use std::sync::RwLockWriteGuard;
 use std::sync::{Arc, RwLock};
 use Directory;
+use Result;
+
+/// Returns true iff the file is "managed".
+/// Non-managed file are not subject to garbage collection.
+///
+/// Filenames that starts by a "." -typically locks-
+/// are not managed.
+fn is_managed(path: &Path) -> bool {
+    path.to_str()
+        .map(|p_str| !p_str.starts_with('.'))
+        .unwrap_or(true)
+}

 /// Wrapper of directories that keeps track of files created by Tantivy.
 ///
@@ -39,19 +52,24 @@ fn save_managed_paths(
    wlock: &RwLockWriteGuard<MetaInformation>,
 ) -> io::Result<()> {
    let mut w = serde_json::to_vec(&wlock.managed_paths)?;
-    write!(&mut w, "\n")?;
+    writeln!(&mut w)?;
    directory.atomic_write(&MANAGED_FILEPATH, &w[..])?;
    Ok(())
 }

 impl ManagedDirectory {
    /// Wraps a directory as managed directory.
-    pub fn new<Dir: Directory>(directory: Dir) -> Result<ManagedDirectory> {
+    pub fn wrap<Dir: Directory>(directory: Dir) -> Result<ManagedDirectory> {
        match directory.atomic_read(&MANAGED_FILEPATH) {
            Ok(data) => {
                let managed_files_json = String::from_utf8_lossy(&data);
                let managed_files: HashSet<PathBuf> = serde_json::from_str(&managed_files_json)
-                    .chain_err(|| ErrorKind::CorruptedFile(MANAGED_FILEPATH.clone()))?;
+                    .map_err(|e| {
+                        DataCorruption::new(
+                            MANAGED_FILEPATH.clone(),
+                            format!("Managed file cannot be deserialized: {:?}. ", e),
+                        )
+                    })?;
                Ok(ManagedDirectory {
                    directory: Box::new(directory),
                    meta_informations: Arc::new(RwLock::new(MetaInformation {
@@ -81,25 +99,35 @@ impl ManagedDirectory {
    pub fn garbage_collect<L: FnOnce() -> HashSet<PathBuf>>(&mut self, get_living_files: L) {
        info!("Garbage collect");
        let mut files_to_delete = vec![];
+
+        // It is crucial to get the living files after acquiring the
+        // read lock of meta informations. That way, we
+        // avoid the following scenario.
+        //
+        // 1) we get the list of living files.
+        // 2) someone creates a new file.
+        // 3) we start garbage collection and remove this file
+        // even though it is a living file.
+        //
+        // releasing the lock as .delete() will use it too.
        {
-            // releasing the lock as .delete() will use it too.
-            let meta_informations_rlock = self.meta_informations
+            let meta_informations_rlock = self
+                .meta_informations
                .read()
                .expect("Managed directory rlock poisoned in garbage collect.");

-            // It is crucial to get the living files after acquiring the
-            // read lock of meta informations. That way, we
-            // avoid the following scenario.
-            //
-            // 1) we get the list of living files.
-            // 2) someone creates a new file.
-            // 3) we start garbage collection and remove this file
-            // even though it is a living file.
-            let living_files = get_living_files();
-
-            for managed_path in &meta_informations_rlock.managed_paths {
-                if !living_files.contains(managed_path) {
-                    files_to_delete.push(managed_path.clone());
+            // The point of this second "file" lock is to enforce the following scenario
+            // 1) process B tries to load a new set of searcher.
+            // The list of segments is loaded
+            // 2) writer change meta.json (for instance after a merge or a commit)
+            // 3) gc kicks in.
+            // 4) gc removes a file that was useful for process B, before process B opened it.
+            if let Ok(_meta_lock) = LockType::MetaLock.acquire_lock(self) {
+                let living_files = get_living_files();
+                for managed_path in &meta_informations_rlock.managed_paths {
+                    if !living_files.contains(managed_path) {
+                        files_to_delete.push(managed_path.clone());
+                    }
                }
            }
        }
@@ -133,7 +161,8 @@ impl ManagedDirectory {
        if !deleted_files.is_empty() {
            // update the list of managed files by removing
            // the file that were removed.
-            let mut meta_informations_wlock = self.meta_informations
+            let mut meta_informations_wlock = self
+                .meta_informations
                .write()
                .expect("Managed directory wlock poisoned (2).");
            {
@@ -155,8 +184,17 @@ impl ManagedDirectory {
    /// registering the filepath and creating the file
    /// will not lead to garbage files that will
    /// never get removed.
+    ///
+    /// File starting by "." are reserved to locks.
+    /// They are not managed and cannot be subjected
+    /// to garbage collection.
    fn register_file_as_managed(&mut self, filepath: &Path) -> io::Result<()> {
-        let mut meta_wlock = self.meta_informations
+        // Files starting by "." (e.g. lock files) are not managed.
+        if !is_managed(filepath) {
+            return Ok(());
+        }
+        let mut meta_wlock = self
+            .meta_informations
            .write()
            .expect("Managed file lock poisoned");
        let has_changed = meta_wlock.managed_paths.insert(filepath.to_owned());
@@ -194,10 +232,6 @@ impl Directory for ManagedDirectory {
    fn exists(&self, path: &Path) -> bool {
        self.directory.exists(path)
    }
-
-    fn box_clone(&self) -> Box<Directory> {
-        Box::new(self.clone())
-    }
 }

 impl Clone for ManagedDirectory {
@@ -231,7 +265,7 @@ mod tests {
        let tempdir_path = PathBuf::from(tempdir.path());
        {
            let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
-            let mut managed_directory = ManagedDirectory::new(mmap_directory).unwrap();
+            let mut managed_directory = ManagedDirectory::wrap(mmap_directory).unwrap();
            {
                let mut write_file = managed_directory.open_write(*TEST_PATH1).unwrap();
                write_file.flush().unwrap();
@@ -257,7 +291,7 @@ mod tests {
        }
        {
            let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
-            let mut managed_directory = ManagedDirectory::new(mmap_directory).unwrap();
+            let mut managed_directory = ManagedDirectory::wrap(mmap_directory).unwrap();
            {
                assert!(managed_directory.exists(*TEST_PATH1));
                assert!(!managed_directory.exists(*TEST_PATH2));
@@ -281,7 +315,7 @@ mod tests {
        let living_files = HashSet::new();

        let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
-        let mut managed_directory = ManagedDirectory::new(mmap_directory).unwrap();
+        let mut managed_directory = ManagedDirectory::wrap(mmap_directory).unwrap();
        managed_directory
            .atomic_write(*TEST_PATH1, &vec![0u8, 1u8])
            .unwrap();
--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -32,7 +32,8 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<MmapReadOnly>, OpenReadE
        }
    })?;

-    let meta_data = file.metadata()
+    let meta_data = file
+        .metadata()
        .map_err(|e| IOError::with_path(full_path.to_owned(), e))?;
    if meta_data.len() == 0 {
        // if the file size is 0, it will not be possible
@@ -309,7 +310,8 @@ impl Directory for MmapDirectory {
        // when the last reference is gone.
        mmap_cache.cache.remove(&full_path);
        match fs::remove_file(&full_path) {
-            Ok(_) => self.sync_directory()
+            Ok(_) => self
+                .sync_directory()
                .map_err(|e| IOError::with_path(path.to_owned(), e).into()),
            Err(e) => {
                if e.kind() == io::ErrorKind::NotFound {
@@ -352,10 +354,6 @@ impl Directory for MmapDirectory {
        meta_file.write(|f| f.write_all(data))?;
        Ok(())
    }
-
-    fn box_clone(&self) -> Box<Directory> {
-        Box::new(self.clone())
-    }
 }

 #[cfg(test)]
@@ -366,6 +364,11 @@ mod tests {

    use super::*;

+    #[test]
+    fn test_open_non_existant_path() {
+        assert!(MmapDirectory::open(PathBuf::from("./nowhere")).is_err());
+    }
+
    #[test]
    fn test_open_empty() {
        // empty file is actually an edge case because those
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -18,7 +18,7 @@ pub mod error;

 use std::io::{BufWriter, Seek, Write};

-pub use self::directory::Directory;
+pub use self::directory::{Directory, DirectoryClone};
 pub use self::ram_directory::RAMDirectory;
 pub use self::read_only_source::ReadOnlySource;

--- a/src/directory/ram_directory.rs
+++ b/src/directory/ram_directory.rs
@@ -170,10 +170,10 @@ impl Directory for RAMDirectory {
        let path_buf = PathBuf::from(path);
        let vec_writer = VecWriter::new(path_buf.clone(), self.fs.clone());

-        let exists = self.fs
+        let exists = self
+            .fs
            .write(path_buf.clone(), &Vec::new())
            .map_err(|err| IOError::with_path(path.to_owned(), err))?;
-
        // force the creation of the file to mimic the MMap directory.
        if exists {
            Err(OpenWriteError::FileAlreadyExists(path_buf))
@@ -196,6 +196,10 @@ impl Directory for RAMDirectory {
    }

    fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
+        fail_point!("RAMDirectory::atomic_write", |msg| Err(io::Error::new(
+            io::ErrorKind::Other,
+            msg.unwrap_or("Undefined".to_string())
+        )));
        let path_buf = PathBuf::from(path);
        let mut vec_writer = VecWriter::new(path_buf.clone(), self.fs.clone());
        self.fs.write(path_buf, &Vec::new())?;
@@ -203,8 +207,4 @@ impl Directory for RAMDirectory {
        vec_writer.flush()?;
        Ok(())
    }
-
-    fn box_clone(&self) -> Box<Directory> {
-        Box::new(self.clone())
-    }
 }
--- a/src/directory/read_only_source.rs
+++ b/src/directory/read_only_source.rs
@@ -5,7 +5,6 @@ use fst::raw::MmapReadOnly;
 use stable_deref_trait::{CloneStableDeref, StableDeref};
 use std::ops::Deref;

-
 /// Read object that represents files in tantivy.
 ///
 /// These read objects are only in charge to deliver
--- a/src/error.rs
+++ b/src/error.rs
@@ -4,135 +4,168 @@ use std::io;

 use directory::error::{IOError, OpenDirectoryError, OpenReadError, OpenWriteError};
 use fastfield::FastFieldNotAvailableError;
+use indexer::LockType;
 use query;
 use schema;
 use serde_json;
+use std::fmt;
 use std::path::PathBuf;
 use std::sync::PoisonError;

-error_chain!(
-    errors {
-        /// Path does not exist.
-        PathDoesNotExist(buf: PathBuf) {
-            description("path does not exist")
-            display("path does not exist: '{:?}'", buf)
-        }
-        /// File already exists, this is a problem when we try to write into a new file.
-        FileAlreadyExists(buf: PathBuf) {
-            description("file already exists")
-            display("file already exists: '{:?}'", buf)
-        }
-        /// IO Error.
-        IOError(err: IOError) {
-            description("an IO error occurred")
-            display("an IO error occurred: '{}'", err)
-        }
-        /// The data within is corrupted.
-        ///
-        /// For instance, it contains invalid JSON.
-        CorruptedFile(buf: PathBuf) {
-            description("file contains corrupted data")
-            display("file contains corrupted data: '{:?}'", buf)
-        }
-        /// A thread holding the locked panicked and poisoned the lock.
-        Poisoned {
-            description("a thread holding the locked panicked and poisoned the lock")
-        }
-        /// Invalid argument was passed by the user.
-        InvalidArgument(arg: String) {
-            description("an invalid argument was passed")
-            display("an invalid argument was passed: '{}'", arg)
-        }
-        /// An Error happened in one of the thread.
-        ErrorInThread(err: String) {
-            description("an error occurred in a thread")
-            display("an error occurred in a thread: '{}'", err)
-        }
-        /// An Error appeared related to the schema.
-        SchemaError(message: String) {
-            description("the schema is not matching expectations.")
-            display("Schema error: '{}'", message)
-        }
-        /// Tried to access a fastfield reader for a field not configured accordingly.
-        FastFieldError(err: FastFieldNotAvailableError) {
-            description("fast field not available")
-            display("fast field not available: '{:?}'", err)
+pub struct DataCorruption {
+    filepath: Option<PathBuf>,
+    comment: String,
+}
+
+impl DataCorruption {
+    pub fn new(filepath: PathBuf, comment: String) -> DataCorruption {
+        DataCorruption {
+            filepath: Some(filepath),
+            comment,
        }
    }
-);

-impl From<FastFieldNotAvailableError> for Error {
-    fn from(fastfield_error: FastFieldNotAvailableError) -> Error {
-        ErrorKind::FastFieldError(fastfield_error).into()
+    pub fn comment_only(comment: String) -> DataCorruption {
+        DataCorruption {
+            filepath: None,
+            comment,
+        }
    }
 }

-impl From<IOError> for Error {
-    fn from(io_error: IOError) -> Error {
-        ErrorKind::IOError(io_error).into()
+impl fmt::Debug for DataCorruption {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "Data corruption: ")?;
+        if let Some(ref filepath) = &self.filepath {
+            write!(f, "(in file `{:?}`)", filepath)?;
+        }
+        write!(f, ": {}.", self.comment)?;
+        Ok(())
    }
 }

-impl From<io::Error> for Error {
-    fn from(io_error: io::Error) -> Error {
-        ErrorKind::IOError(io_error.into()).into()
+/// The library's failure based error enum
+#[derive(Debug, Fail)]
+pub enum TantivyError {
+    /// Path does not exist.
+    #[fail(display = "Path does not exist: '{:?}'", _0)]
+    PathDoesNotExist(PathBuf),
+    /// File already exists, this is a problem when we try to write into a new file.
+    #[fail(display = "File already exists: '{:?}'", _0)]
+    FileAlreadyExists(PathBuf),
+    /// Index already exists in this directory
+    #[fail(display = "Index already exists")]
+    IndexAlreadyExists,
+    /// Failed to acquire file lock
+    #[fail(
+        display = "Failed to acquire Lockfile: {:?}. Possible causes: another IndexWriter instance or panic during previous lock drop.",
+        _0
+    )]
+    LockFailure(LockType),
+    /// IO Error.
+    #[fail(display = "An IO error occurred: '{}'", _0)]
+    IOError(#[cause] IOError),
+    /// Data corruption.
+    #[fail(display = "{:?}", _0)]
+    DataCorruption(DataCorruption),
+    /// A thread holding the locked panicked and poisoned the lock.
+    #[fail(display = "A thread holding the locked panicked and poisoned the lock")]
+    Poisoned,
+    /// Invalid argument was passed by the user.
+    #[fail(display = "An invalid argument was passed: '{}'", _0)]
+    InvalidArgument(String),
+    /// An Error happened in one of the thread.
+    #[fail(display = "An error occurred in a thread: '{}'", _0)]
+    ErrorInThread(String),
+    /// An Error appeared related to the schema.
+    #[fail(display = "Schema error: '{}'", _0)]
+    SchemaError(String),
+    /// Tried to access a fastfield reader for a field not configured accordingly.
+    #[fail(display = "Fast field not available: '{:?}'", _0)]
+    FastFieldError(#[cause] FastFieldNotAvailableError),
+    /// System error. (e.g.: We failed spawning a new thread)
+    #[fail(display = "System error.'{}'", _0)]
+    SystemError(String),
+}
+
+impl From<DataCorruption> for TantivyError {
+    fn from(data_corruption: DataCorruption) -> TantivyError {
+        TantivyError::DataCorruption(data_corruption)
    }
 }

-impl From<query::QueryParserError> for Error {
-    fn from(parsing_error: query::QueryParserError) -> Error {
-        ErrorKind::InvalidArgument(format!("Query is invalid. {:?}", parsing_error)).into()
+impl From<FastFieldNotAvailableError> for TantivyError {
+    fn from(fastfield_error: FastFieldNotAvailableError) -> TantivyError {
+        TantivyError::FastFieldError(fastfield_error)
    }
 }

-impl<Guard> From<PoisonError<Guard>> for Error {
-    fn from(_: PoisonError<Guard>) -> Error {
-        ErrorKind::Poisoned.into()
+impl From<IOError> for TantivyError {
+    fn from(io_error: IOError) -> TantivyError {
+        TantivyError::IOError(io_error)
    }
 }

-impl From<OpenReadError> for Error {
-    fn from(error: OpenReadError) -> Error {
+impl From<io::Error> for TantivyError {
+    fn from(io_error: io::Error) -> TantivyError {
+        TantivyError::IOError(io_error.into())
+    }
+}
+
+impl From<query::QueryParserError> for TantivyError {
+    fn from(parsing_error: query::QueryParserError) -> TantivyError {
+        TantivyError::InvalidArgument(format!("Query is invalid. {:?}", parsing_error))
+    }
+}
+
+impl<Guard> From<PoisonError<Guard>> for TantivyError {
+    fn from(_: PoisonError<Guard>) -> TantivyError {
+        TantivyError::Poisoned
+    }
+}
+
+impl From<OpenReadError> for TantivyError {
+    fn from(error: OpenReadError) -> TantivyError {
        match error {
-            OpenReadError::FileDoesNotExist(filepath) => {
-                ErrorKind::PathDoesNotExist(filepath).into()
+            OpenReadError::FileDoesNotExist(filepath) => TantivyError::PathDoesNotExist(filepath),
+            OpenReadError::IOError(io_error) => TantivyError::IOError(io_error),
+        }
+    }
+}
+
+impl From<schema::DocParsingError> for TantivyError {
+    fn from(error: schema::DocParsingError) -> TantivyError {
+        TantivyError::InvalidArgument(format!("Failed to parse document {:?}", error))
+    }
+}
+
+impl From<OpenWriteError> for TantivyError {
+    fn from(error: OpenWriteError) -> TantivyError {
+        match error {
+            OpenWriteError::FileAlreadyExists(filepath) => {
+                TantivyError::FileAlreadyExists(filepath)
            }
-            OpenReadError::IOError(io_error) => ErrorKind::IOError(io_error).into(),
+            OpenWriteError::IOError(io_error) => TantivyError::IOError(io_error),
        }
    }
 }

-impl From<schema::DocParsingError> for Error {
-    fn from(error: schema::DocParsingError) -> Error {
-        ErrorKind::InvalidArgument(format!("Failed to parse document {:?}", error)).into()
-    }
-}
-
-impl From<OpenWriteError> for Error {
-    fn from(error: OpenWriteError) -> Error {
-        match error {
-            OpenWriteError::FileAlreadyExists(filepath) => ErrorKind::FileAlreadyExists(filepath),
-            OpenWriteError::IOError(io_error) => ErrorKind::IOError(io_error),
-        }.into()
-    }
-}
-
-impl From<OpenDirectoryError> for Error {
-    fn from(error: OpenDirectoryError) -> Error {
+impl From<OpenDirectoryError> for TantivyError {
+    fn from(error: OpenDirectoryError) -> TantivyError {
        match error {
            OpenDirectoryError::DoesNotExist(directory_path) => {
-                ErrorKind::PathDoesNotExist(directory_path).into()
+                TantivyError::PathDoesNotExist(directory_path)
+            }
+            OpenDirectoryError::NotADirectory(directory_path) => {
+                TantivyError::InvalidArgument(format!("{:?} is not a directory", directory_path))
            }
-            OpenDirectoryError::NotADirectory(directory_path) => ErrorKind::InvalidArgument(
-                format!("{:?} is not a directory", directory_path),
-            ).into(),
        }
    }
 }

-impl From<serde_json::Error> for Error {
-    fn from(error: serde_json::Error) -> Error {
+impl From<serde_json::Error> for TantivyError {
+    fn from(error: serde_json::Error) -> TantivyError {
        let io_err = io::Error::from(error);
-        ErrorKind::IOError(io_err.into()).into()
+        TantivyError::IOError(io_err.into())
    }
 }
--- a/src/fastfield/bytes/mod.rs
+++ b/src/fastfield/bytes/mod.rs
@@ -6,12 +6,12 @@ pub use self::writer::BytesFastFieldWriter;

 #[cfg(test)]
 mod tests {
-    use schema::SchemaBuilder;
+    use schema::Schema;
    use Index;

    #[test]
    fn test_bytes() {
-        let mut schema_builder = SchemaBuilder::default();
+        let mut schema_builder = Schema::builder();
        let field = schema_builder.add_bytes_field("bytesfield");
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
--- a/src/fastfield/bytes/writer.rs
+++ b/src/fastfield/bytes/writer.rs
@@ -51,7 +51,7 @@ impl BytesFastFieldWriter {
        self.next_doc();
        for field_value in doc.field_values() {
            if field_value.field() == self.field {
-                if let &Value::Bytes(ref bytes) = field_value.value() {
+                if let Value::Bytes(ref bytes) = *field_value.value() {
                    self.vals.extend_from_slice(bytes);
                } else {
                    panic!(
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -2,6 +2,7 @@ use bit_set::BitSet;
 use common::HasLen;
 use directory::ReadOnlySource;
 use directory::WritePtr;
+use space_usage::ByteCount;
 use std::io;
 use std::io::Write;
 use DocId;
@@ -41,7 +42,8 @@ pub struct DeleteBitSet {
 impl DeleteBitSet {
    /// Opens a delete bitset given its data source.
    pub fn open(data: ReadOnlySource) -> DeleteBitSet {
-        let num_deleted: usize = data.as_slice()
+        let num_deleted: usize = data
+            .as_slice()
            .iter()
            .map(|b| b.count_ones() as usize)
            .sum();
@@ -62,6 +64,11 @@ impl DeleteBitSet {
            b & (1u8 << shift) != 0
        }
    }
+
+    /// Summarize total space usage of this bitset.
+    pub fn space_usage(&self) -> ByteCount {
+        self.data.len()
+    }
 }

 impl HasLen for DeleteBitSet {
--- a/src/fastfield/error.rs
+++ b/src/fastfield/error.rs
@@ -4,7 +4,8 @@ use std::result;
 /// `FastFieldNotAvailableError` is returned when the
 /// user requested for a fast field reader, and the field was not
 /// defined in the schema as a fast field.
-#[derive(Debug)]
+#[derive(Debug, Fail)]
+#[fail(display = "field not available: '{:?}'", field_name)]
 pub struct FastFieldNotAvailableError {
    field_name: String,
 }
--- a/src/fastfield/facet_reader.rs
+++ b/src/fastfield/facet_reader.rs
@@ -1,5 +1,6 @@
 use super::MultiValueIntFastFieldReader;
 use schema::Facet;
+use std::str;
 use termdict::TermDictionary;
 use termdict::TermOrdinal;
 use DocId;
@@ -20,6 +21,7 @@ use DocId;
 pub struct FacetReader {
    term_ords: MultiValueIntFastFieldReader<u64>,
    term_dict: TermDictionary,
+    buffer: Vec<u8>,
 }

 impl FacetReader {
@@ -37,6 +39,7 @@ impl FacetReader {
        FacetReader {
            term_ords,
            term_dict,
+            buffer: vec![],
        }
    }

@@ -55,10 +58,18 @@ impl FacetReader {
    }

    /// Given a term ordinal returns the term associated to it.
-    pub fn facet_from_ord(&self, facet_ord: TermOrdinal, output: &mut Facet) {
-        let found_term = self.term_dict
-            .ord_to_term(facet_ord as u64, output.inner_buffer_mut());
+    pub fn facet_from_ord(
+        &mut self,
+        facet_ord: TermOrdinal,
+        output: &mut Facet,
+    ) -> Result<(), str::Utf8Error> {
+        let found_term = self
+            .term_dict
+            .ord_to_term(facet_ord as u64, &mut self.buffer);
        assert!(found_term, "Term ordinal {} no found.", facet_ord);
+        let facet_str = str::from_utf8(&self.buffer[..])?;
+        output.set_facet_str(facet_str);
+        Ok(())
    }

    /// Return the list of facet ordinals associated to a document.
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -127,19 +127,19 @@ mod tests {
    use common::CompositeFile;
    use directory::{Directory, RAMDirectory, WritePtr};
    use fastfield::FastFieldReader;
-    use rand::Rng;
+    use rand::prelude::SliceRandom;
+    use rand::rngs::StdRng;
    use rand::SeedableRng;
-    use rand::XorShiftRng;
    use schema::Document;
    use schema::Field;
+    use schema::Schema;
    use schema::FAST;
-    use schema::{Schema, SchemaBuilder};
    use std::collections::HashMap;
    use std::path::Path;

    lazy_static! {
        pub static ref SCHEMA: Schema = {
-            let mut schema_builder = SchemaBuilder::default();
+            let mut schema_builder = Schema::builder();
            schema_builder.add_u64_field("field", FAST);
            schema_builder.build()
        };
@@ -298,7 +298,7 @@ mod tests {
    fn test_signed_intfastfield() {
        let path = Path::new("test");
        let mut directory: RAMDirectory = RAMDirectory::create();
-        let mut schema_builder = SchemaBuilder::new();
+        let mut schema_builder = Schema::builder();

        let i64_field = schema_builder.add_i64_field("field", FAST);
        let schema = schema_builder.build();
@@ -342,7 +342,7 @@ mod tests {
    fn test_signed_intfastfield_default_val() {
        let path = Path::new("test");
        let mut directory: RAMDirectory = RAMDirectory::create();
-        let mut schema_builder = SchemaBuilder::new();
+        let mut schema_builder = Schema::builder();
        let i64_field = schema_builder.add_i64_field("field", FAST);
        let schema = schema_builder.build();

@@ -367,11 +367,10 @@ mod tests {
        }
    }

+    // Warning: this generates the same permutation at each call
    pub fn generate_permutation() -> Vec<u64> {
-        let seed: &[u32; 4] = &[1, 2, 3, 4];
-        let mut rng = XorShiftRng::from_seed(*seed);
-        let mut permutation: Vec<u64> = (0u64..1_000_000u64).collect();
-        rng.shuffle(&mut permutation);
+        let mut permutation: Vec<u64> = (0u64..100_000u64).collect();
+        permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
        permutation
    }

--- a/src/fastfield/multivalued/mod.rs
+++ b/src/fastfield/multivalued/mod.rs
@@ -9,12 +9,12 @@ mod tests {

    use schema::Cardinality;
    use schema::IntOptions;
-    use schema::SchemaBuilder;
+    use schema::Schema;
    use Index;

    #[test]
    fn test_multivalued_u64() {
-        let mut schema_builder = SchemaBuilder::default();
+        let mut schema_builder = Schema::builder();
        let field = schema_builder.add_u64_field(
            "multifield",
            IntOptions::default().set_fast(Cardinality::MultiValues),
@@ -49,7 +49,7 @@ mod tests {

    #[test]
    fn test_multivalued_i64() {
-        let mut schema_builder = SchemaBuilder::default();
+        let mut schema_builder = Schema::builder();
        let field = schema_builder.add_i64_field(
            "multifield",
            IntOptions::default().set_fast(Cardinality::MultiValues),
--- a/src/fastfield/multivalued/reader.rs
+++ b/src/fastfield/multivalued/reader.rs
@@ -47,11 +47,11 @@ impl<Item: FastValue> MultiValueIntFastFieldReader<Item> {
 mod tests {

    use core::Index;
-    use schema::{Document, Facet, SchemaBuilder};
+    use schema::{Document, Facet, Schema};

    #[test]
    fn test_multifastfield_reader() {
-        let mut schema_builder = SchemaBuilder::new();
+        let mut schema_builder = Schema::builder();
        let facet_field = schema_builder.add_facet_field("facets");
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
@@ -82,27 +82,27 @@ mod tests {

        let mut facet = Facet::root();
        {
-            facet_reader.facet_from_ord(1, &mut facet);
+            facet_reader.facet_from_ord(1, &mut facet).unwrap();
            assert_eq!(facet, Facet::from("/category"));
        }
        {
-            facet_reader.facet_from_ord(2, &mut facet);
+            facet_reader.facet_from_ord(2, &mut facet).unwrap();
            assert_eq!(facet, Facet::from("/category/cat1"));
        }
        {
-            facet_reader.facet_from_ord(3, &mut facet);
+            facet_reader.facet_from_ord(3, &mut facet).unwrap();
            assert_eq!(format!("{}", facet), "/category/cat2");
            assert_eq!(facet, Facet::from("/category/cat2"));
        }
        {
-            facet_reader.facet_from_ord(4, &mut facet);
+            facet_reader.facet_from_ord(4, &mut facet).unwrap();
            assert_eq!(facet, Facet::from("/category/cat3"));
        }

        let mut vals = Vec::new();
        {
            facet_reader.facet_ords(0, &mut vals);
-            assert_eq!(&vals[..], &[3, 2]);
+            assert_eq!(&vals[..], &[2, 3]);
        }
        {
            facet_reader.facet_ords(1, &mut vals);
--- a/src/fastfield/multivalued/writer.rs
+++ b/src/fastfield/multivalued/writer.rs
@@ -90,10 +90,10 @@ impl MultiValueIntFastFieldWriter {

    /// Serializes fast field values by pushing them to the `FastFieldSerializer`.
    ///
-    /// HashMap makes it possible to remap them before serializing.
-    /// Specifically, string terms are first stored in the writer as their
-    /// position in the `IndexWriter`'s `HashMap`. This value is called
-    /// an `UnorderedTermId`.
+    /// If a mapping is given, the values are remapped *and sorted* before serialization.
+    /// This is used when serializing `facets`. Specifically their terms are
+    /// first stored in the writer as their position in the `IndexWriter`'s `HashMap`.
+    /// This value is called an `UnorderedTermId`.
    ///
    /// During the serialization of the segment, terms gets sorted and
    /// `tantivy` builds a mapping to convert this `UnorderedTermId` into
@@ -125,9 +125,29 @@ impl MultiValueIntFastFieldWriter {
                        mapping.len() as u64,
                        1,
                    )?;
-                    for val in &self.vals {
-                        let remapped_val = *mapping.get(val).expect("Missing term ordinal");
-                        value_serializer.add_val(remapped_val)?;
+
+                    let last_interval = (
+                        self.doc_index.last().cloned().unwrap(),
+                        self.vals.len() as u64,
+                    );
+
+                    let mut doc_vals: Vec<u64> = Vec::with_capacity(100);
+                    for (start, stop) in self
+                        .doc_index
+                        .windows(2)
+                        .map(|interval| (interval[0], interval[1]))
+                        .chain(Some(last_interval).into_iter())
+                        .map(|(start, stop)| (start as usize, stop as usize))
+                    {
+                        doc_vals.clear();
+                        let remapped_vals = self.vals[start..stop]
+                            .iter()
+                            .map(|val| *mapping.get(val).expect("Missing term ordinal"));
+                        doc_vals.extend(remapped_vals);
+                        doc_vals.sort();
+                        for &val in &doc_vals {
+                            value_serializer.add_val(val)?;
+                        }
                    }
                }
                None => {
--- a/src/fastfield/reader.rs
+++ b/src/fastfield/reader.rs
@@ -7,11 +7,10 @@ use directory::ReadOnlySource;
 use directory::{Directory, RAMDirectory, WritePtr};
 use fastfield::{FastFieldSerializer, FastFieldsWriter};
 use owning_ref::OwningRef;
-use schema::SchemaBuilder;
+use schema::Schema;
 use schema::FAST;
 use std::collections::HashMap;
 use std::marker::PhantomData;
-use std::mem;
 use std::path::Path;
 use DocId;

@@ -80,7 +79,8 @@ impl<Item: FastValue> FastFieldReader<Item> {
    // TODO change start to `u64`.
    // For multifastfield, start is an index in a second fastfield, not a `DocId`
    pub fn get_range(&self, start: u32, output: &mut [Item]) {
-        let output_u64: &mut [u64] = unsafe { mem::transmute(output) }; // ok: Item is either `u64` or `i64`
+        // ok: Item is either `u64` or `i64`
+        let output_u64: &mut [u64] = unsafe { &mut *(output as *mut [Item] as *mut [u64]) };
        self.bit_unpacker.get_range(start, output_u64);
        for out in output_u64.iter_mut() {
            *out = Item::from_u64(*out + self.min_value_u64).as_u64();
@@ -108,7 +108,7 @@ impl<Item: FastValue> FastFieldReader<Item> {

 impl<Item: FastValue> From<Vec<Item>> for FastFieldReader<Item> {
    fn from(vals: Vec<Item>) -> FastFieldReader<Item> {
-        let mut schema_builder = SchemaBuilder::default();
+        let mut schema_builder = Schema::builder();
        let field = schema_builder.add_u64_field("field", FAST);
        let schema = schema_builder.build();
        let path = Path::new("__dummy__");
--- a/src/fieldnorm/code.rs
+++ b/src/fieldnorm/code.rs
@@ -10,27 +10,28 @@ pub fn fieldnorm_to_id(fieldnorm: u32) -> u8 {
        .unwrap_or_else(|idx| idx - 1) as u8
 }

+#[cfg_attr(feature = "cargo-clippy", allow(clippy::unreadable_literal))]
 pub const FIELD_NORMS_TABLE: [u32; 256] = [
    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
    26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, 56, 60,
    64, 68, 72, 76, 80, 84, 88, 96, 104, 112, 120, 128, 136, 144, 152, 168, 184, 200, 216, 232,
-    248, 264, 280, 312, 344, 376, 408, 440, 472, 504, 536, 600, 664, 728, 792, 856, 920, 984, 1048,
-    1176, 1304, 1432, 1560, 1688, 1816, 1944, 2072, 2328, 2584, 2840, 3096, 3352, 3608, 3864, 4120,
-    4632, 5144, 5656, 6168, 6680, 7192, 7704, 8216, 9240, 10264, 11288, 12312, 13336, 14360, 15384,
-    16408, 18456, 20504, 22552, 24600, 26648, 28696, 30744, 32792, 36888, 40984, 45080, 49176,
-    53272, 57368, 61464, 65560, 73752, 81944, 90136, 98328, 106520, 114712, 122904, 131096, 147480,
-    163864, 180248, 196632, 213016, 229400, 245784, 262168, 294936, 327704, 360472, 393240, 426008,
-    458776, 491544, 524312, 589848, 655384, 720920, 786456, 851992, 917528, 983064, 1048600,
-    1179672, 1310744, 1441816, 1572888, 1703960, 1835032, 1966104, 2097176, 2359320, 2621464,
-    2883608, 3145752, 3407896, 3670040, 3932184, 4194328, 4718616, 5242904, 5767192, 6291480,
-    6815768, 7340056, 7864344, 8388632, 9437208, 10485784, 11534360, 12582936, 13631512, 14680088,
-    15728664, 16777240, 18874392, 20971544, 23068696, 25165848, 27263000, 29360152, 31457304,
-    33554456, 37748760, 41943064, 46137368, 50331672, 54525976, 58720280, 62914584, 67108888,
-    75497496, 83886104, 92274712, 100663320, 109051928, 117440536, 125829144, 134217752, 150994968,
-    167772184, 184549400, 201326616, 218103832, 234881048, 251658264, 268435480, 301989912,
-    335544344, 369098776, 402653208, 436207640, 469762072, 503316504, 536870936, 603979800,
-    671088664, 738197528, 805306392, 872415256, 939524120, 1006632984, 1073741848, 1207959576,
-    1342177304, 1476395032, 1610612760, 1744830488, 1879048216, 2013265944,
+    248, 264, 280, 312, 344, 376, 408, 440, 472, 504, 536, 600, 664, 728, 792, 856, 920, 984,
+    1_048, 1176, 1304, 1432, 1560, 1688, 1816, 1944, 2072, 2328, 2584, 2840, 3096, 3352, 3608,
+    3864, 4120, 4632, 5144, 5656, 6168, 6680, 7192, 7704, 8216, 9240, 10264, 11288, 12312, 13336,
+    14360, 15384, 16408, 18456, 20504, 22552, 24600, 26648, 28696, 30744, 32792, 36888, 40984,
+    45080, 49176, 53272, 57368, 61464, 65560, 73752, 81944, 90136, 98328, 106520, 114712, 122904,
+    131096, 147480, 163864, 180248, 196632, 213016, 229400, 245784, 262168, 294936, 327704, 360472,
+    393240, 426008, 458776, 491544, 524312, 589848, 655384, 720920, 786456, 851992, 917528, 983064,
+    1048600, 1179672, 1310744, 1441816, 1572888, 1703960, 1835032, 1966104, 2097176, 2359320,
+    2621464, 2883608, 3145752, 3407896, 3670040, 3932184, 4194328, 4718616, 5242904, 5767192,
+    6291480, 6815768, 7340056, 7864344, 8388632, 9437208, 10485784, 11534360, 12582936, 13631512,
+    14680088, 15728664, 16777240, 18874392, 20971544, 23068696, 25165848, 27263000, 29360152,
+    31457304, 33554456, 37748760, 41943064, 46137368, 50331672, 54525976, 58720280, 62914584,
+    67108888, 75497496, 83886104, 92274712, 100663320, 109051928, 117440536, 125829144, 134217752,
+    150994968, 167772184, 184549400, 201326616, 218103832, 234881048, 251658264, 268435480,
+    301989912, 335544344, 369098776, 402653208, 436207640, 469762072, 503316504, 536870936,
+    603979800, 671088664, 738197528, 805306392, 872415256, 939524120, 1006632984, 1073741848,
+    1207959576, 1342177304, 1476395032, 1610612760, 1744830488, 1879048216, 2013265944,
 ];

 #[cfg(test)]
--- a/src/fieldnorm/mod.rs
+++ b/src/fieldnorm/mod.rs
@@ -15,7 +15,7 @@
 //! precompute computationally expensive functions of the fieldnorm
 //! in a very short array.
 //!
-//! This trick is used by the [BM25 similarity]().
+//! This trick is used by the BM25 similarity.
 mod code;
 mod reader;
 mod serializer;
--- a/src/functional_test.rs
+++ b/src/functional_test.rs
@@ -1,7 +1,7 @@
 use rand::thread_rng;
 use std::collections::HashSet;

-use rand::distributions::{IndependentSample, Range};
+use rand::Rng;
 use schema::*;
 use Index;
 use Searcher;
@@ -15,7 +15,7 @@ fn check_index_content(searcher: &Searcher, vals: &HashSet<u64>) {
 #[ignore]
 #[cfg(feature = "mmap")]
 fn test_indexing() {
-    let mut schema_builder = SchemaBuilder::default();
+    let mut schema_builder = Schema::builder();

    let id_field = schema_builder.add_u64_field("id", INT_INDEXED);
    let multiples_field = schema_builder.add_u64_field("multiples", INT_INDEXED);
@@ -23,7 +23,6 @@ fn test_indexing() {

    let index = Index::create_from_tempdir(schema).unwrap();

-    let universe = Range::new(0u64, 20u64);
    let mut rng = thread_rng();

    let mut index_writer = index.writer_with_num_threads(3, 120_000_000).unwrap();
@@ -32,7 +31,7 @@ fn test_indexing() {
    let mut uncommitted_docs: HashSet<u64> = HashSet::new();

    for _ in 0..200 {
-        let random_val = universe.ind_sample(&mut rng);
+        let random_val = rng.gen_range(0, 20);
        if random_val == 0 {
            index_writer.commit().expect("Commit failed");
            committed_docs.extend(&uncommitted_docs);
--- a/src/indexer/delete_queue.rs
+++ b/src/indexer/delete_queue.rs
@@ -52,7 +52,8 @@ impl DeleteQueue {
    //
    // Past delete operations are not accessible.
    pub fn cursor(&self) -> DeleteCursor {
-        let last_block = self.inner
+        let last_block = self
+            .inner
            .read()
            .expect("Read lock poisoned when opening delete queue cursor")
            .last_block
@@ -92,7 +93,8 @@ impl DeleteQueue {
    // be some unflushed operations.
    //
    fn flush(&self) -> Option<Arc<Block>> {
-        let mut self_wlock = self.inner
+        let mut self_wlock = self
+            .inner
            .write()
            .expect("Failed to acquire write lock on delete queue writer");

@@ -132,7 +134,8 @@ impl From<DeleteQueue> for NextBlock {
 impl NextBlock {
    fn next_block(&self) -> Option<Arc<Block>> {
        {
-            let next_read_lock = self.0
+            let next_read_lock = self
+                .0
                .read()
                .expect("Failed to acquire write lock in delete queue");
            if let InnerNextBlock::Closed(ref block) = *next_read_lock {
@@ -141,7 +144,8 @@ impl NextBlock {
        }
        let next_block;
        {
-            let mut next_write_lock = self.0
+            let mut next_write_lock = self
+                .0
                .write()
                .expect("Failed to acquire write lock in delete queue");
            match *next_write_lock {
@@ -182,19 +186,18 @@ impl DeleteCursor {
    /// `opstamp >= target_opstamp`.
    pub fn skip_to(&mut self, target_opstamp: u64) {
        // TODO Can be optimize as we work with block.
-        #[cfg_attr(feature = "cargo-clippy", allow(while_let_loop))]
-        loop {
-            if let Some(operation) = self.get() {
-                if operation.opstamp >= target_opstamp {
-                    break;
-                }
-            } else {
-                break;
-            }
+        while self.is_behind_opstamp(target_opstamp) {
            self.advance();
        }
    }

+    #[cfg_attr(feature = "cargo-clippy", allow(clippy::wrong_self_convention))]
+    fn is_behind_opstamp(&mut self, target_opstamp: u64) -> bool {
+        self.get()
+            .map(|operation| operation.opstamp < target_opstamp)
+            .unwrap_or(false)
+    }
+
    /// If the current block has been entirely
    /// consumed, try to load the next one.
    ///
--- a/src/indexer/directory_lock.rs
+++ b/src/indexer/directory_lock.rs
@@ -1,26 +1,130 @@
-use core::LOCKFILE_FILEPATH;
 use directory::error::OpenWriteError;
+use std::io::Write;
+use std::path::{Path, PathBuf};
+use std::thread;
+use std::time::Duration;
 use Directory;
+use TantivyError;

-/// The directory lock is a mechanism used to
-/// prevent the creation of two [`IndexWriter`](struct.IndexWriter.html)
-///
-/// Only one lock can exist at a time for a given directory.
-/// The lock is release automatically on `Drop`.
-pub struct DirectoryLock {
-    directory: Box<Directory>,
+#[derive(Debug, Clone, Copy)]
+pub enum LockType {
+    /// Only one process should be able to write tantivy's index at a time.
+    /// This lock file, when present, is in charge of preventing other processes to open an IndexWriter.
+    ///
+    /// If the process is killed and this file remains, it is safe to remove it manually.
+    ///
+    /// Failing to acquire this lock usually means a misuse of tantivy's API,
+    /// (creating more than one instance of the `IndexWriter`), are a spurious
+    /// lock file remaining after a crash. In the latter case, removing the file after
+    /// checking no process running tantivy is running is safe.
+    IndexWriterLock,
+    /// The meta lock file is here to protect the segment files being opened by
+    /// `.load_searchers()` from being garbage collected.
+    /// It makes it possible for another process to safely consume
+    /// our index in-writing. Ideally, we may have prefered `RWLock` semantics
+    /// here, but it is difficult to achieve on Windows.
+    ///
+    /// Opening segment readers is a very fast process.
+    /// Right now if the lock cannot be acquire on the first attempt, the logic
+    /// is very simplistic. We retry after `100ms` until we effectively
+    /// acquire the lock.
+    /// This lock should not have much contention in normal usage.
+    MetaLock,
 }

-impl DirectoryLock {
-    pub fn lock(mut directory: Box<Directory>) -> Result<DirectoryLock, OpenWriteError> {
-        directory.open_write(&*LOCKFILE_FILEPATH)?;
-        Ok(DirectoryLock { directory })
+/// Retry the logic of acquiring locks is pretty simple.
+/// We just retry `n` times after a given `duratio`, both
+/// depending on the type of lock.
+struct RetryPolicy {
+    num_retries: usize,
+    wait_in_ms: u64,
+}
+
+impl RetryPolicy {
+    fn no_retry() -> RetryPolicy {
+        RetryPolicy {
+            num_retries: 0,
+            wait_in_ms: 0,
+        }
    }
+
+    fn wait_and_retry(&mut self) -> bool {
+        if self.num_retries == 0 {
+            false
+        } else {
+            self.num_retries -= 1;
+            let wait_duration = Duration::from_millis(self.wait_in_ms);
+            thread::sleep(wait_duration);
+            true
+        }
+    }
+}
+
+impl LockType {
+    fn retry_policy(self) -> RetryPolicy {
+        match self {
+            LockType::IndexWriterLock => RetryPolicy::no_retry(),
+            LockType::MetaLock => RetryPolicy {
+                num_retries: 100,
+                wait_in_ms: 100,
+            },
+        }
+    }
+
+    fn try_acquire_lock(self, directory: &mut Directory) -> Result<DirectoryLock, TantivyError> {
+        let path = self.filename();
+        let mut write = directory.open_write(path).map_err(|e| match e {
+            OpenWriteError::FileAlreadyExists(_) => TantivyError::LockFailure(self),
+            OpenWriteError::IOError(io_error) => TantivyError::IOError(io_error),
+        })?;
+        write.flush()?;
+        Ok(DirectoryLock {
+            directory: directory.box_clone(),
+            path: path.to_owned(),
+        })
+    }
+
+    /// Acquire a lock in the given directory.
+    pub fn acquire_lock(self, directory: &Directory) -> Result<DirectoryLock, TantivyError> {
+        let mut box_directory = directory.box_clone();
+        let mut retry_policy = self.retry_policy();
+        loop {
+            let lock_result = self.try_acquire_lock(&mut *box_directory);
+            match lock_result {
+                Ok(result) => {
+                    return Ok(result);
+                }
+                Err(TantivyError::LockFailure(ref filepath)) => {
+                    if !retry_policy.wait_and_retry() {
+                        return Err(TantivyError::LockFailure(filepath.to_owned()));
+                    }
+                }
+                Err(_) => {}
+            }
+        }
+    }
+
+    fn filename(&self) -> &Path {
+        match *self {
+            LockType::MetaLock => Path::new(".tantivy-meta.lock"),
+            LockType::IndexWriterLock => Path::new(".tantivy-indexer.lock"),
+        }
+    }
+}
+
+/// The `DirectoryLock` is an object that represents a file lock.
+/// See  [`LockType`](struct.LockType.html)
+///
+/// It is transparently associated to a lock file, that gets deleted
+/// on `Drop.` The lock is release automatically on `Drop`.
+pub struct DirectoryLock {
+    directory: Box<Directory>,
+    path: PathBuf,
 }

 impl Drop for DirectoryLock {
    fn drop(&mut self) {
-        if let Err(e) = self.directory.delete(&*LOCKFILE_FILEPATH) {
+        if let Err(e) = self.directory.delete(&*self.path) {
            error!("Failed to remove the lock file. {:?}", e);
        }
    }
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -2,15 +2,15 @@ use super::operation::AddOperation;
 use super::segment_updater::SegmentUpdater;
 use super::PreparedCommit;
 use bit_set::BitSet;
-use chan;
 use core::Index;
 use core::Segment;
 use core::SegmentComponent;
 use core::SegmentId;
 use core::SegmentMeta;
 use core::SegmentReader;
+use crossbeam::channel;
 use docset::DocSet;
-use error::{Error, ErrorKind, Result, ResultExt};
+use error::TantivyError;
 use fastfield::write_delete_bitset;
 use futures::sync::oneshot::Receiver;
 use indexer::delete_queue::{DeleteCursor, DeleteQueue};
@@ -29,6 +29,7 @@ use std::mem;
 use std::mem::swap;
 use std::thread;
 use std::thread::JoinHandle;
+use Result;

 // Size of the margin for the heap. A segment is closed when the remaining memory
 // in the heap goes below MARGIN_IN_BYTES.
@@ -42,8 +43,8 @@ pub const HEAP_SIZE_MAX: usize = u32::max_value() as usize - MARGIN_IN_BYTES;
 // reaches `PIPELINE_MAX_SIZE_IN_DOCS`
 const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000;

-type DocumentSender = chan::Sender<AddOperation>;
-type DocumentReceiver = chan::Receiver<AddOperation>;
+type DocumentSender = channel::Sender<AddOperation>;
+type DocumentReceiver = channel::Receiver<AddOperation>;

 /// Split the thread memory budget into
 /// - the heap size
@@ -53,13 +54,14 @@ type DocumentReceiver = chan::Receiver<AddOperation>;
 fn initial_table_size(per_thread_memory_budget: usize) -> usize {
    let table_size_limit: usize = per_thread_memory_budget / 3;
    (1..)
-        .into_iter()
        .take_while(|num_bits: &usize| compute_table_size(*num_bits) < table_size_limit)
        .last()
-        .expect(&format!(
-            "Per thread memory is too small: {}",
-            per_thread_memory_budget
-        ))
+        .unwrap_or_else(|| {
+            panic!(
+                "Per thread memory is too small: {}",
+                per_thread_memory_budget
+            )
+        })
        .min(19) // we cap it at 512K
 }

@@ -122,14 +124,14 @@ pub fn open_index_writer(
            "The heap size per thread needs to be at least {}.",
            HEAP_SIZE_MIN
        );
-        bail!(ErrorKind::InvalidArgument(err_msg));
+        return Err(TantivyError::InvalidArgument(err_msg));
    }
    if heap_size_in_bytes_per_thread >= HEAP_SIZE_MAX {
        let err_msg = format!("The heap size per thread cannot exceed {}", HEAP_SIZE_MAX);
-        bail!(ErrorKind::InvalidArgument(err_msg));
+        return Err(TantivyError::InvalidArgument(err_msg));
    }
    let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) =
-        chan::sync(PIPELINE_MAX_SIZE_IN_DOCS);
+        channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS);

    let delete_queue = DeleteQueue::new();

@@ -138,7 +140,7 @@ pub fn open_index_writer(
    let stamper = Stamper::new(current_opstamp);

    let segment_updater =
-        SegmentUpdater::new(index.clone(), stamper.clone(), &delete_queue.cursor())?;
+        SegmentUpdater::create(index.clone(), stamper.clone(), &delete_queue.cursor())?;

    let mut index_writer = IndexWriter {
        _directory_lock: Some(directory_lock),
@@ -176,7 +178,7 @@ pub fn compute_deleted_bitset(
 ) -> Result<bool> {
    let mut might_have_changed = false;

-    #[cfg_attr(feature = "cargo-clippy", allow(while_let_loop))]
+    #[cfg_attr(feature = "cargo-clippy", allow(clippy::while_let_loop))]
    loop {
        if let Some(delete_op) = delete_cursor.get() {
            if delete_op.opstamp > target_opstamp {
@@ -300,25 +302,29 @@ fn index_documents(

    let last_docstamp: u64 = *(doc_opstamps.last().unwrap());

-    let doc_to_opstamps = DocToOpstampMapping::from(doc_opstamps);
-    let segment_reader = SegmentReader::open(segment)?;
-    let mut deleted_bitset = BitSet::with_capacity(num_docs as usize);
-    let may_have_deletes = compute_deleted_bitset(
-        &mut deleted_bitset,
-        &segment_reader,
-        &mut delete_cursor,
-        &doc_to_opstamps,
-        last_docstamp,
-    )?;
-
-    let segment_entry = SegmentEntry::new(segment_meta, delete_cursor, {
-        if may_have_deletes {
-            Some(deleted_bitset)
-        } else {
-            None
-        }
-    });
-
+    let segment_entry: SegmentEntry = if delete_cursor.get().is_some() {
+        let doc_to_opstamps = DocToOpstampMapping::from(doc_opstamps);
+        let segment_reader = SegmentReader::open(segment)?;
+        let mut deleted_bitset = BitSet::with_capacity(num_docs as usize);
+        let may_have_deletes = compute_deleted_bitset(
+            &mut deleted_bitset,
+            &segment_reader,
+            &mut delete_cursor,
+            &doc_to_opstamps,
+            last_docstamp,
+        )?;
+        SegmentEntry::new(segment_meta, delete_cursor, {
+            if may_have_deletes {
+                Some(deleted_bitset)
+            } else {
+                None
+            }
+        })
+    } else {
+        // if there are no delete operation in the queue, no need
+        // to even open the segment.
+        SegmentEntry::new(segment_meta, delete_cursor, None)
+    };
    Ok(segment_updater.add_segment(generation, segment_entry))
 }

@@ -334,13 +340,16 @@ impl IndexWriter {
            join_handle
                .join()
                .expect("Indexing Worker thread panicked")
-                .chain_err(|| ErrorKind::ErrorInThread("Error in indexing worker thread.".into()))?;
+                .map_err(|_| {
+                    TantivyError::ErrorInThread("Error in indexing worker thread.".into())
+                })?;
        }
        drop(self.workers_join_handle);

-        let result = self.segment_updater
+        let result = self
+            .segment_updater
            .wait_merging_thread()
-            .chain_err(|| ErrorKind::ErrorInThread("Failed to join merging thread.".into()));
+            .map_err(|_| TantivyError::ErrorInThread("Failed to join merging thread.".into()));

        if let Err(ref e) = result {
            error!("Some merging thread failed {:?}", e);
@@ -380,7 +389,7 @@ impl IndexWriter {
        let mem_budget = self.heap_size_in_bytes_per_thread;
        let join_handle: JoinHandle<Result<()>> = thread::Builder::new()
            .name(format!(
-                "indexing thread {} for gen {}",
+                "thrd-tantivy-index{}-gen{}",
                self.worker_id, generation
            ))
            .spawn(move || {
@@ -458,10 +467,8 @@ impl IndexWriter {
    ///
    /// Returns the former segment_ready channel.
    fn recreate_document_channel(&mut self) -> DocumentReceiver {
-        let (mut document_sender, mut document_receiver): (
-            DocumentSender,
-            DocumentReceiver,
-        ) = chan::sync(PIPELINE_MAX_SIZE_IN_DOCS);
+        let (mut document_sender, mut document_receiver): (DocumentSender, DocumentReceiver) =
+            channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS);
        swap(&mut self.document_sender, &mut document_sender);
        swap(&mut self.document_receiver, &mut document_receiver);
        document_receiver
@@ -485,7 +492,8 @@ impl IndexWriter {
        let document_receiver = self.document_receiver.clone();

        // take the directory lock to create a new index_writer.
-        let directory_lock = self._directory_lock
+        let directory_lock = self
+            ._directory_lock
            .take()
            .expect("The IndexWriter does not have any lock. This is a bug, please report.");

@@ -550,16 +558,13 @@ impl IndexWriter {
        // and recreate a new one channels.
        self.recreate_document_channel();

-        let mut former_workers_join_handle = Vec::new();
-        swap(
-            &mut former_workers_join_handle,
-            &mut self.workers_join_handle,
-        );
+        let former_workers_join_handle =
+            mem::replace(&mut self.workers_join_handle, Vec::new());

        for worker_handle in former_workers_join_handle {
            let indexing_worker_result = worker_handle
                .join()
-                .map_err(|e| Error::from_kind(ErrorKind::ErrorInThread(format!("{:?}", e))))?;
+                .map_err(|e| TantivyError::ErrorInThread(format!("{:?}", e)))?;

            indexing_worker_result?;
            // add a new worker for the next generation.
@@ -633,7 +638,10 @@ impl IndexWriter {
    pub fn add_document(&mut self, document: Document) -> u64 {
        let opstamp = self.stamper.stamp();
        let add_operation = AddOperation { opstamp, document };
-        self.document_sender.send(add_operation);
+        let send_result = self.document_sender.send(add_operation);
+        if let Err(e) = send_result {
+            panic!("Failed to index document. Sending to indexing channel failed. This probably means all of the indexing threads have panicked. {:?}", e);
+        }
        opstamp
    }
 }
@@ -642,7 +650,6 @@ impl IndexWriter {
 mod tests {

    use super::initial_table_size;
-    use env_logger;
    use error::*;
    use indexer::NoMergePolicy;
    use schema::{self, Document};
@@ -651,18 +658,33 @@ mod tests {

    #[test]
    fn test_lockfile_stops_duplicates() {
-        let schema_builder = schema::SchemaBuilder::default();
+        let schema_builder = schema::Schema::builder();
        let index = Index::create_in_ram(schema_builder.build());
        let _index_writer = index.writer(40_000_000).unwrap();
        match index.writer(40_000_000) {
-            Err(Error(ErrorKind::FileAlreadyExists(_), _)) => {}
+            Err(TantivyError::LockFailure(_)) => {}
            _ => panic!("Expected FileAlreadyExists error"),
        }
    }

+    #[test]
+    fn test_lockfile_already_exists_error_msg() {
+        let schema_builder = schema::Schema::builder();
+        let index = Index::create_in_ram(schema_builder.build());
+        let _index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        match index.writer_with_num_threads(1, 3_000_000) {
+            Err(err) => {
+                let err_msg = err.to_string();
+                assert!(err_msg.contains("Lockfile"));
+                assert!(err_msg.contains("Possible causes:"))
+            }
+            _ => panic!("Expected LockfileAlreadyExists error"),
+        }
+    }
+
    #[test]
    fn test_set_merge_policy() {
-        let schema_builder = schema::SchemaBuilder::default();
+        let schema_builder = schema::Schema::builder();
        let index = Index::create_in_ram(schema_builder.build());
        let index_writer = index.writer(40_000_000).unwrap();
        assert_eq!(
@@ -680,7 +702,7 @@ mod tests {

    #[test]
    fn test_lockfile_released_on_drop() {
-        let schema_builder = schema::SchemaBuilder::default();
+        let schema_builder = schema::Schema::builder();
        let index = Index::create_in_ram(schema_builder.build());
        {
            let _index_writer = index.writer(40_000_000).unwrap();
@@ -692,7 +714,7 @@ mod tests {

    #[test]
    fn test_commit_and_rollback() {
-        let mut schema_builder = schema::SchemaBuilder::default();
+        let mut schema_builder = schema::Schema::builder();
        let text_field = schema_builder.add_text_field("text", schema::TEXT);
        let index = Index::create_in_ram(schema_builder.build());

@@ -714,7 +736,7 @@ mod tests {
                index_writer.add_document(doc!(text_field=>"b"));
                index_writer.add_document(doc!(text_field=>"c"));
            }
-            assert_eq!(index_writer.commit().unwrap(), 2u64);
+            assert_eq!(index_writer.commit().unwrap(), 3u64);
            index.load_searchers().unwrap();
            assert_eq!(num_docs_containing("a"), 0);
            assert_eq!(num_docs_containing("b"), 1);
@@ -726,8 +748,7 @@ mod tests {

    #[test]
    fn test_with_merges() {
-        let _ = env_logger::init();
-        let mut schema_builder = schema::SchemaBuilder::default();
+        let mut schema_builder = schema::Schema::builder();
        let text_field = schema_builder.add_text_field("text", schema::TEXT);
        let index = Index::create_in_ram(schema_builder.build());
        let num_docs_containing = |s: &str| {
@@ -764,8 +785,7 @@ mod tests {

    #[test]
    fn test_prepare_with_commit_message() {
-        let _ = env_logger::init();
-        let mut schema_builder = schema::SchemaBuilder::default();
+        let mut schema_builder = schema::Schema::builder();
        let text_field = schema_builder.add_text_field("text", schema::TEXT);
        let index = Index::create_in_ram(schema_builder.build());

@@ -779,7 +799,6 @@ mod tests {
            {
                let mut prepared_commit = index_writer.prepare_commit().expect("commit failed");
                prepared_commit.set_payload("first commit");
-                assert_eq!(prepared_commit.opstamp(), 100);
                prepared_commit.commit().expect("commit failed");
            }
            {
@@ -799,8 +818,7 @@ mod tests {

    #[test]
    fn test_prepare_but_rollback() {
-        let _ = env_logger::init();
-        let mut schema_builder = schema::SchemaBuilder::default();
+        let mut schema_builder = schema::Schema::builder();
        let text_field = schema_builder.add_text_field("text", schema::TEXT);
        let index = Index::create_in_ram(schema_builder.build());

@@ -814,7 +832,6 @@ mod tests {
            {
                let mut prepared_commit = index_writer.prepare_commit().expect("commit failed");
                prepared_commit.set_payload("first commit");
-                assert_eq!(prepared_commit.opstamp(), 100);
                prepared_commit.abort().expect("commit failed");
            }
            {
@@ -844,4 +861,32 @@ mod tests {
        assert_eq!(initial_table_size(1_000_000_000), 19);
    }

+    #[cfg(not(feature = "no_fail"))]
+    #[test]
+    fn test_write_commit_fails() {
+        use fail;
+        let mut schema_builder = schema::Schema::builder();
+        let text_field = schema_builder.add_text_field("text", schema::TEXT);
+        let index = Index::create_in_ram(schema_builder.build());
+
+        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        for _ in 0..100 {
+            index_writer.add_document(doc!(text_field => "a"));
+        }
+        index_writer.commit().unwrap();
+        fail::cfg("RAMDirectory::atomic_write", "return(error_write_failed)").unwrap();
+        for _ in 0..100 {
+            index_writer.add_document(doc!(text_field => "b"));
+        }
+        assert!(index_writer.commit().is_err());
+        index.load_searchers().unwrap();
+        let num_docs_containing = |s: &str| {
+            let searcher = index.searcher();
+            let term_a = Term::from_field_text(text_field, s);
+            searcher.doc_freq(&term_a)
+        };
+        assert_eq!(num_docs_containing("a"), 100);
+        assert_eq!(num_docs_containing("b"), 0);
+        fail::cfg("RAMDirectory::atomic_write", "off").unwrap();
+    }
 }
--- a/src/indexer/log_merge_policy.rs
+++ b/src/indexer/log_merge_policy.rs
@@ -80,10 +80,6 @@ impl MergePolicy for LogMergePolicy {
            .map(|ind_vec| MergeCandidate(ind_vec.iter().map(|&ind| segments[ind].id()).collect()))
            .collect()
    }
-
-    fn box_clone(&self) -> Box<MergePolicy> {
-        Box::new(self.clone())
-    }
 }

 impl Default for LogMergePolicy {
--- a/src/indexer/merge_policy.rs
+++ b/src/indexer/merge_policy.rs
@@ -11,18 +11,31 @@ pub struct MergeCandidate(pub Vec<SegmentId>);
 ///
 /// Every time a the list of segments changes, the segment updater
 /// asks the merge policy if some segments should be merged.
-pub trait MergePolicy: marker::Send + marker::Sync + Debug {
+pub trait MergePolicy: MergePolicyClone + marker::Send + marker::Sync + Debug {
    /// Given the list of segment metas, returns the list of merge candidates.
    ///
    /// This call happens on the segment updater thread, and will block
    /// other segment updates, so all implementations should happen rapidly.
    fn compute_merge_candidates(&self, segments: &[SegmentMeta]) -> Vec<MergeCandidate>;
+}
+
+/// MergePolicyClone
+pub trait MergePolicyClone {
    /// Returns a boxed clone of the MergePolicy.
    fn box_clone(&self) -> Box<MergePolicy>;
 }

+impl<T> MergePolicyClone for T
+where
+    T: 'static + MergePolicy + Clone,
+{
+    fn box_clone(&self) -> Box<MergePolicy> {
+        Box::new(self.clone())
+    }
+}
+
 /// Never merge segments.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct NoMergePolicy;

 impl Default for NoMergePolicy {
@@ -35,10 +48,6 @@ impl MergePolicy for NoMergePolicy {
    fn compute_merge_candidates(&self, _segments: &[SegmentMeta]) -> Vec<MergeCandidate> {
        Vec::new()
    }
-
-    fn box_clone(&self) -> Box<MergePolicy> {
-        Box::new(NoMergePolicy)
-    }
 }

 #[cfg(test)]
@@ -52,7 +61,7 @@ pub mod tests {
    ///
    /// Everytime there is more than one segment,
    /// it will suggest to merge them.
-    #[derive(Debug)]
+    #[derive(Debug, Clone)]
    pub struct MergeWheneverPossible;

    impl MergePolicy for MergeWheneverPossible {
@@ -67,9 +76,5 @@ pub mod tests {
                vec![]
            }
        }
-
-        fn box_clone(&self) -> Box<MergePolicy> {
-            Box::new(MergeWheneverPossible)
-        }
    }
 }
--- a/Show More
+++ b/Show More