Switching to stable rust in AppVeyor.

Preparing for release
Preparing for release.
2025-12-31 06:22:54 +00:00 · 2018-06-22 14:33:42 +09:00 · 2018-06-22 14:27:46 +09:00 · 2018-06-22 14:09:14 +09:00 · 2018-06-19 10:45:20 +09:00 · 2018-06-16 14:08:30 +09:00
238 changed files with 14091 additions and 103444 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+*.swp
 target
 target/debug
 .vscode
@@ -8,4 +9,4 @@ benchmark
 cpp/simdcomp/bitpackingbenchmark
 *.bk
 .idea
-trace.dat
+trace.dat
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,35 +1,127 @@
+# Based on the "trust" template v0.1.2
+# https://github.com/japaric/trust/tree/v0.1.2
+
+dist: trusty
 language: rust
-rust:
-  - nightly
+services: docker
+sudo: required
+
 env:
  global:
-    - CC=gcc-4.8
-    - CXX=g++-4.8
-    - TRAVIS_CARGO_NIGHTLY_FEATURE=""
-    - secure: eC8HjTi1wgRVCsMAeXEXt8Ckr0YBSGOEnQkkW4/Nde/OZ9jJjz2nmP1ELQlDE7+czHub2QvYtDMG0parcHZDx/Kus0yvyn08y3g2rhGIiE7y8OCvQm1Mybu2D/p7enm6shXquQ6Z5KRfRq+18mHy80wy9ABMA/ukEZdvnfQ76/Een8/Lb0eHaDoXDXn3PqLVtByvSfQQ7OhS60dEScu8PWZ6/l1057P5NpdWbMExBE7Ro4zYXNhkJeGZx0nP/Bd4Jjdt1XfPzMEybV6NZ5xsTILUBFTmOOt603IsqKGov089NExqxYu5bD3K+S4MzF1Nd6VhomNPJqLDCfhlymJCUj5n5Ku4yidlhQbM4Ej9nGrBalJnhcjBjPua5tmMF2WCxP9muKn/2tIOu1/+wc0vMf9Yd3wKIkf5+FtUxCgs2O+NslWvmOMAMI/yD25m7hb4t1IwE/4Bk+GVcWJRWXbo0/m6ZUHzRzdjUY2a1qvw7C9udzdhg7gcnXwsKrSWi2NjMiIVw86l+Zim0nLpKIN41sxZHLaFRG63Ki8zQ/481LGn32awJ6i3sizKS0WD+N1DfR2qYMrwYHaMN0uR0OFXYTJkFvTFttAeUY3EKmRKAuMhmO2YRdSr4/j/G5E9HMc1gSGJj6PxgpQU7EpvxRsmoVAEJr0mszmOj9icGHep/FM=
-addons:
-  apt:
-    sources:
-      - ubuntu-toolchain-r-test
-    packages:
-      - gcc-4.8
-      - g++-4.8
-      - libcurl4-openssl-dev
-      - libelf-dev
-      - libdw-dev
-      - binutils-dev
-before_script:
-  - |
-    pip install 'travis-cargo<0.2' --user &&
-    export PATH=$HOME/.local/bin:$PATH
+    - CRATE_NAME=tantivy
+
+matrix:
+  include:
+    # Android
+    - env: TARGET=aarch64-linux-android DISABLE_TESTS=1
+    - env: TARGET=arm-linux-androideabi DISABLE_TESTS=1
+    - env: TARGET=armv7-linux-androideabi DISABLE_TESTS=1
+    - env: TARGET=i686-linux-android DISABLE_TESTS=1
+    - env: TARGET=x86_64-linux-android DISABLE_TESTS=1
+
+    # iOS
+    #- env: TARGET=aarch64-apple-ios DISABLE_TESTS=1
+    #  os: osx
+    #- env: TARGET=armv7-apple-ios DISABLE_TESTS=1
+    #  os: osx
+    #- env: TARGET=armv7s-apple-ios DISABLE_TESTS=1
+    #  os: osx
+    #- env: TARGET=i386-apple-ios DISABLE_TESTS=1
+    #  os: osx
+    - env: TARGET=x86_64-apple-ios DISABLE_TESTS=1
+      os: osx
+
+    # Linux
+    - env: TARGET=aarch64-unknown-linux-gnu
+    # - env: TARGET=arm-unknown-linux-gnueabi
+    # - env: TARGET=armv7-unknown-linux-gnueabihf
+    - env: TARGET=i686-unknown-linux-gnu
+    #- env: TARGET=i686-unknown-linux-musl
+    #- env: TARGET=mips-unknown-linux-gnu
+    #- env: TARGET=mips64-unknown-linux-gnuabi64
+    #- env: TARGET=mips64el-unknown-linux-gnuabi64
+    #- env: TARGET=mipsel-unknown-linux-gnu
+    #- env: TARGET=powerpc-unknown-linux-gnu
+    #- env: TARGET=powerpc64-unknown-linux-gnu
+    #- env: TARGET=powerpc64le-unknown-linux-gnu
+    #- env: TARGET=s390x-unknown-linux-gnu DISABLE_TESTS=1
+    - env: TARGET=x86_64-unknown-linux-gnu
+    - env: TARGET=x86_64-unknown-linux-musl
+
+    # OSX
+    #- env: TARGET=i686-apple-darwin
+    #  os: osx
+    - env: TARGET=x86_64-apple-darwin
+      os: osx
+
+    # *BSD
+    #- env: TARGET=i686-unknown-freebsd DISABLE_TESTS=1
+    #- env: TARGET=x86_64-unknown-freebsd DISABLE_TESTS=1
+    #- env: TARGET=x86_64-unknown-netbsd DISABLE_TESTS=1
+
+    # Windows
+    #- env: TARGET=x86_64-pc-windows-gnu
+
+    # Bare metal
+    # These targets don't support std and as such are likely not suitable for
+    # most crates.
+    # - env: TARGET=thumbv6m-none-eabi
+    # - env: TARGET=thumbv7em-none-eabi
+    # - env: TARGET=thumbv7em-none-eabihf
+    # - env: TARGET=thumbv7m-none-eabi
+
+    # Testing other channels
+    #- env: TARGET=x86_64-unknown-linux-gnu
+    #  rust: nightly
+    #- env: TARGET=x86_64-apple-darwin
+    #  os: osx
+    #  rust: nightly
+
+before_install:
+  - set -e
+  - rustup self update
+
+install:
+  - sh ci/install.sh
+  - source ~/.cargo/env || true
+
 script:
-  - |
-    travis-cargo build &&
-    travis-cargo test &&
-    travis-cargo bench
-  - cargo run --example simple_search
-after_success:
-  - bash ./script/build-doc.sh
-  - travis-cargo doc-upload
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then travis-cargo coveralls --no-sudo --verify; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ./kcov/build/src/kcov --verify --coveralls-id=$TRAVIS_JOB_ID --include-path=`pwd`/src --exclude-path=`pwd`/cpp --exclude-pattern=/.cargo target/kcov target/debug/tantivy-*; fi
+  - bash ci/script.sh
+
+after_script: set +e
+
+before_deploy:
+  - sh ci/before_deploy.sh
+#
+#deploy:
+#  # - Create a `public_repo` GitHub token. Go to: https://github.com/settings/tokens/new
+#  # - Encrypt it: `travis encrypt 0123456789012345678901234567890123456789
+#  # - Paste the output down here
+#  api_key:
+#    secure: eC8HjTi1wgRVCsMAeXEXt8Ckr0YBSGOEnQkkW4/Nde/OZ9jJjz2nmP1ELQlDE7+czHub2QvYtDMG0parcHZDx/Kus0yvyn08y3g2rhGIiE7y8OCvQm1Mybu2D/p7enm6shXquQ6Z5KRfRq+18mHy80wy9ABMA/ukEZdvnfQ76/Een8/Lb0eHaDoXDXn3PqLVtByvSfQQ7OhS60dEScu8PWZ6/l1057P5NpdWbMExBE7Ro4zYXNhkJeGZx0nP/Bd4Jjdt1XfPzMEybV6NZ5xsTILUBFTmOOt603IsqKGov089NExqxYu5bD3K+S4MzF1Nd6VhomNPJqLDCfhlymJCUj5n5Ku4yidlhQbM4Ej9nGrBalJnhcjBjPua5tmMF2WCxP9muKn/2tIOu1/+wc0vMf9Yd3wKIkf5+FtUxCgs2O+NslWvmOMAMI/yD25m7hb4t1IwE/4Bk+GVcWJRWXbo0/m6ZUHzRzdjUY2a1qvw7C9udzdhg7gcnXwsKrSWi2NjMiIVw86l+Zim0nLpKIN41sxZHLaFRG63Ki8zQ/481LGn32awJ6i3sizKS0WD+N1DfR2qYMrwYHaMN0uR0OFXYTJkFvTFttAeUY3EKmRKAuMhmO2YRdSr4/j/G5E9HMc1gSGJj6PxgpQU7EpvxRsmoVAEJr0mszmOj9icGHep/FM=
+#  file_glob: true
+#  file: $CRATE_NAME-$TRAVIS_TAG-$TARGET.*
+#  on:
+#    # TODO Here you can pick which targets will generate binary releases
+#    # In this example, there are some targets that are tested using the stable
+#    # and nightly channels. This condition makes sure there is only one release
+#    # for such targets and that's generated using the stable channel
+#    condition: $TRAVIS_RUST_VERSION = stable
+#    tags: true
+#  provider: releases
+#  skip_cleanup: true
+
+cache: cargo
+before_cache:
+  # Travis can't cache files that are not readable by "others"
+  - chmod -R a+r $HOME/.cargo
+
+#branches:
+#  only:
+#    # release tags
+#    - /^v\d+\.\d+\.\d+.*$/
+#    - master
+
+notifications:
+  email:
+    on_success: never
--- a/11
+++ b/11
@@ -0,0 +1,11 @@
+# This is the list of authors of tantivy for copyright purposes.
+Paul Masurel
+Laurentiu Nicola
+Dru Sellers
+Ashley Mannix
+Michael J. Curry
+Jason Wolfe
+# As an employee of Google I am required to add Google LLC
+# in the list of authors, but this project is not affiliated to Google
+# in any other way.
+Google LLC 
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,48 @@
+Tantivy 0.6
+==========================
+
+
+Special thanks to @drusellers and @jason-wolfe for their contributions
+to this release!
+
+- Removed C code. Tantivy is now pure Rust. (@pmasurel)
+- BM25 (@pmasurel)
+- Approximate field norms encoded over 1 byte. (@pmasurel)
+- Compiles on stable rust (@pmasurel)
+- Add &[u8] fastfield for associating arbitrary bytes to each document (@jason-wolfe) (#270)
+    - Completely uncompressed
+    - Internally: One u64 fast field for indexes, one fast field for the bytes themselves.
+- Add NGram token support (@drusellers)
+- Add Stopword Filter support (@drusellers)
+- Add a FuzzyTermQuery (@drusellers)
+- Add a RegexQuery (@drusellers)
+- Various performance improvements (@pmasurel)_
+
+
+Tantivy 0.5.2
+===========================
+- bugfix #274
+- bugfix #280
+- bugfix #289
+
+
+Tantivy 0.5.1
+==========================
+- bugfix #254 : tantivy failed if no documents in a segment contained a specific field.
+
+
+Tantivy 0.5
+==========================
+- Faceting
+- RangeQuery
+- Configurable tokenization pipeline
+- Bugfix in PhraseQuery
+- Various query optimisation
+- Allowing very large indexes
+    - 64 bits file address
+    - Smarter encoding of the `TermInfo` objects
+
+

 Tantivy 0.4.3
 ==========================
@@ -57,7 +102,7 @@ Tantivy 0.3
 Special thanks to @Kodraus @lnicola @Ameobea @manuel-woelker @celaus
 for their contribution to this release.

-Thanks also to everyone in tantivy gitter chat 
+Thanks also to everyone in tantivy gitter chat
 for their advise and company :)

 https://gitter.im/tantivy-search/tantivy
@@ -65,9 +110,9 @@ https://gitter.im/tantivy-search/tantivy

 Warning:

-Tantivy 0.3 is NOT backward compatible with tantivy 0.2 
+Tantivy 0.3 is NOT backward compatible with tantivy 0.2
 code and index format.
-You should not expect backward compatibility before 
+You should not expect backward compatibility before
 tantivy 1.0.


@@ -93,7 +138,7 @@ Thanks to @KodrAus ! (#108)
  the natural ordering.
 - Building binary targets for tantivy-cli (Thanks to @KodrAus)
 - Misc invisible bug fixes, and code cleanup.
- Use 
+- Use



--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,25 +1,27 @@
 [package]
 name = "tantivy"
-version = "0.5.0-dev"
+version = "0.6.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
-build = "build.rs"
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
-description = """Tantivy is a search engine library."""
+description = """Search engine library"""
 documentation = "https://tantivy-search.github.io/tantivy/tantivy/index.html"
 homepage = "https://github.com/tantivy-search/tantivy"
 repository = "https://github.com/tantivy-search/tantivy"
 readme = "README.md"
-keywords = ["search", "information", "retrieval"]
+keywords = ["search", "search engine", "information", "retrieval"]

 [dependencies]
+base64 = "0.9.1"
 byteorder = "1.0"
-memmap = "0.4"
 lazy_static = "0.2.1"
 tinysegmenter = "0.1.0"
 regex = "0.2"
-fst = "0.1.37"
-atomicwrites = "0.1.3"
+fst = {version="0.3", default-features=false}
+fst-regex = { version="0.2" }
+lz4 = {version="1.20", optional=true}
+snap = {version="0.2"}
+atomicwrites = {version="0.2.2", optional=true}
 tempfile = "2.1"
 log = "0.3.6"
 combine = "2.2"
@@ -27,16 +29,12 @@ tempdir = "0.3"
 serde = "1.0"
 serde_derive = "1.0"
 serde_json = "1.0"
-bincode = "0.8"
-libc = {version = "0.2.20", optional=true}
 num_cpus = "1.2"
 itertools = "0.5.9"
-lz4 = "1.20"
+levenshtein_automata = {version="0.1", features=["fst_automaton"]}
 bit-set = "0.4.0"
-time = "0.1"
-uuid = { version = "0.5", features = ["v4", "serde"] }
+uuid = { version = "0.6", features = ["v4", "serde"] }
 chan = "0.1"
-version = "2"
 crossbeam = "0.3"
 futures = "0.1"
 futures-cpupool = "0.1"
@@ -44,6 +42,10 @@ error-chain = "0.8"
 owning_ref = "0.3"
 stable_deref_trait = "1.0.0"
 rust-stemmers = "0.1.0"
+downcast = { version="0.9" }
+matches = "0.1"
+bitpacking = "0.5"
+fnv = "1.0.6"

 [target.'cfg(windows)'.dependencies]
 winapi = "0.2"
@@ -52,21 +54,23 @@ winapi = "0.2"
 rand = "0.3"
 env_logger = "0.4"

-[build-dependencies]
-cc = {version = "1.0.0", optional=true}
-
 [profile.release]
 opt-level = 3
 debug = false
 lto = true
 debug-assertions = false

-
 [features]
-default = ["simdcompression"]
-simdcompression = ["libc", "cc"]
-streamdict = []
-
+default = ["mmap"]
+mmap = ["fst/mmap", "atomicwrites"]
+lz4-compression = ["lz4"]

 [badges]
 travis-ci = { repository = "tantivy-search/tantivy" }
+
+[[example]]
+name = "simple_search"
+required-features = ["mmap"]
+
+[[example]]
+name = "custom_tokenizer"
--- a/2
+++ b/2
@@ -1,4 +1,4 @@
-Copyright (c) 2016 Paul Masurel
+Copyright (c) 2018 by the project authors, as listed in the AUTHORS file. 

 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

--- a/README.md
+++ b/README.md
@@ -4,35 +4,50 @@
 [![Coverage Status](https://coveralls.io/repos/github/tantivy-search/tantivy/badge.svg?branch=master&refresh1)](https://coveralls.io/github/tantivy-search/tantivy?branch=master)
 [![Join the chat at https://gitter.im/tantivy-search/tantivy](https://badges.gitter.im/tantivy-search/tantivy.svg)](https://gitter.im/tantivy-search/tantivy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![Build status](https://ci.appveyor.com/api/projects/status/r7nb13kj23u8m9pj?svg=true)](https://ci.appveyor.com/project/fulmicoton/tantivy)
-![beacon for google analytics](https://ga-beacon.appspot.com/UA-88834340-1/tantivy/README)
+[![Build status](https://ci.appveyor.com/api/projects/status/r7nb13kj23u8m9pj/branch/master?svg=true)](https://ci.appveyor.com/project/fulmicoton/tantivy/branch/master)

 **Tantivy** is a **full text search engine library** written in rust.

-It is strongly inspired by Lucene's design.
+It is closer to Lucene than to Elastic Search and Solr in the sense it is not
+an off-the-shelf search engine server, but rather a crate that can be used
+to build such a search engine.

+Tantivy is, in fact, strongly inspired by Lucene's design.

 # Features

- configurable indexing (optional term frequency and position indexing)
- tf-idf scoring
- Basic query language
- Phrase queries
+- Full-text search
+- Tiny startup time (<10ms), perfect for command line tools
+- BM25 scoring (the same as lucene)
+- Basic query language (`+michael +jackson`)
+- Phrase queries search (\"michael jackson\"`)
 - Incremental indexing
 - Multithreaded indexing (indexing English Wikipedia takes < 3 minutes on my desktop)
- mmap based
- optional SIMD integer compression
- u64 and i64 fast fields (equivalent of doc values in Lucene)
+- Mmap directory
+- SIMD integer compression when the platform/CPU includes the SSE2 instruction set.
+- Single valued and multivalued u64 and i64 fast fields (equivalent of doc values in Lucene)
+- `&[u8]` fast fields
 - LZ4 compressed document store
+- Range queries
+- Faceted search
+- Configurable indexing (optional term frequency and position indexing
 - Cheesy logo with a horse

-Tantivy supports Linux, MacOS and Windows.
+# Non-features

+- Distributed search and will not be in the scope of tantivy.
+
+
+# Supported OS and compiler
+
+Tantivy works on stable rust (>= 1.27) and supports Linux, MacOS and Windows.

 # Getting started

- [tantivy's usage example](http://fulmicoton.com/tantivy-examples/simple_search.html)
+- [tantivy's simple search example](http://fulmicoton.com/tantivy-examples/simple_search.html)
 - [tantivy-cli and its tutorial](https://github.com/tantivy-search/tantivy-cli).
+`tantivy-cli` is an actual command line interface that makes it easy for you to create a search engine,
+index documents and search via the CLI or a small server with a REST API.
 It will walk you through getting a wikipedia search engine up and running in a few minutes.
 - [reference doc]
    - [For the last released version](https://docs.rs/tantivy/)
@@ -40,21 +55,16 @@ It will walk you through getting a wikipedia search engine up and running in a f

 # Compiling

-Tantivy requires Rust Nightly because it uses requires the features [`box_syntax`](https://doc.rust-lang.org/stable/book/box-syntax-and-patterns.html), [`optin_builtin_traits`](https://github.com/rust-lang/rfcs/blob/master/text/0019-opt-in-builtin-traits.md), and [`conservative_impl_trait`](https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md).
-The project can then be built using `cargo`.
+## Development
+
+Tantivy compiles on stable rust but requires `Rust >= 1.27`.
+To check out and run tests, you can simply run :

    git clone git@github.com:tantivy-search/tantivy.git
    cd tantivy
    cargo build


-Alternatively, if you are trying to compile `tantivy` without simd compression,
-you can disable this functionality. In this case, this submodule is not required
-and you can compile tantivy by using the `--no-default-features` flag.
-
-    cargo build --no-default-features
-
-
 # Contribute

 Send me an email (paul.masurel at gmail.com) if you want to contribute to tantivy.
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -6,9 +6,6 @@ environment:
  matrix:
    - channel: nightly
      target: x86_64-pc-windows-msvc
-    - channel: nightly
-      target: x86_64-pc-windows-gnu
-      msys_bits: 64

 install:
  - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
--- a/build.rs
+++ b/build.rs
@@ -1,61 +0,0 @@
-#[cfg(feature = "simdcompression")]
-mod build {
-    extern crate cc;
-
-    pub fn build() {
-        let mut config = cc::Build::new();
-        config
-            .include("./cpp/simdcomp/include")
-            .file("cpp/simdcomp/src/avxbitpacking.c")
-            .file("cpp/simdcomp/src/simdintegratedbitpacking.c")
-            .file("cpp/simdcomp/src/simdbitpacking.c")
-            .file("cpp/simdcomp/src/simdpackedsearch.c")
-            .file("cpp/simdcomp/src/simdcomputil.c")
-            .file("cpp/simdcomp/src/simdpackedselect.c")
-            .file("cpp/simdcomp/src/simdfor.c")
-            .file("cpp/simdcomp_wrapper.c");
-
-        if !cfg!(debug_assertions) {
-            config.opt_level(3);
-
-            if cfg!(target_env = "msvc") {
-                config
-                    .define("NDEBUG", None)
-                    .flag("/Gm-")
-                    .flag("/GS-")
-                    .flag("/Gy")
-                    .flag("/Oi")
-                    .flag("/GL");
-            }
-        }
-
-        if !cfg!(target_env = "msvc") {
-            config
-                .include("./cpp/streamvbyte/include")
-                .file("cpp/streamvbyte/src/streamvbyte.c")
-                .file("cpp/streamvbyte/src/streamvbytedelta.c")
-                .flag("-msse4.1")
-                .flag("-march=native")
-                .flag("-std=c99");
-        }
-
-        config.compile("libsimdcomp.a");
-
-        // Workaround for linking static libraries built with /GL
-        // https://github.com/rust-lang/rust/issues/26003
-        if !cfg!(debug_assertions) && cfg!(target_env = "msvc") {
-            println!("cargo:rustc-link-lib=dylib=simdcomp");
-        }
-
-        println!("cargo:rerun-if-changed=cpp");
-    }
-}
-
-#[cfg(not(feature = "simdcompression"))]
-mod build {
-    pub fn build() {}
-}
-
-fn main() {
-    build::build();
-}
--- a/ci/before_deploy.ps1
+++ b/ci/before_deploy.ps1
@@ -0,0 +1,23 @@
+# This script takes care of packaging the build artifacts that will go in the
+# release zipfile
+
+$SRC_DIR = $PWD.Path
+$STAGE = [System.Guid]::NewGuid().ToString()
+
+Set-Location $ENV:Temp
+New-Item -Type Directory -Name $STAGE
+Set-Location $STAGE
+
+$ZIP = "$SRC_DIR\$($Env:CRATE_NAME)-$($Env:APPVEYOR_REPO_TAG_NAME)-$($Env:TARGET).zip"
+
+# TODO Update this to package the right artifacts
+Copy-Item "$SRC_DIR\target\$($Env:TARGET)\release\hello.exe" '.\'
+
+7z a "$ZIP" *
+
+Push-AppveyorArtifact "$ZIP"
+
+Remove-Item *.* -Force
+Set-Location ..
+Remove-Item $STAGE
+Set-Location $SRC_DIR
--- a/ci/before_deploy.sh
+++ b/ci/before_deploy.sh
@@ -0,0 +1,33 @@
+# This script takes care of building your crate and packaging it for release
+
+set -ex
+
+main() {
+    local src=$(pwd) \
+          stage=
+
+    case $TRAVIS_OS_NAME in
+        linux)
+            stage=$(mktemp -d)
+            ;;
+        osx)
+            stage=$(mktemp -d -t tmp)
+            ;;
+    esac
+
+    test -f Cargo.lock || cargo generate-lockfile
+
+    # TODO Update this to build the artifacts that matter to you
+    cross rustc --bin hello --target $TARGET --release -- -C lto
+
+    # TODO Update this to package the right artifacts
+    cp target/$TARGET/release/hello $stage/
+
+    cd $stage
+    tar czf $src/$CRATE_NAME-$TRAVIS_TAG-$TARGET.tar.gz *
+    cd $src
+
+    rm -rf $stage
+}
+
+main
--- a/ci/install.sh
+++ b/ci/install.sh
@@ -0,0 +1,47 @@
+set -ex
+
+main() {
+    local target=
+    if [ $TRAVIS_OS_NAME = linux ]; then
+        target=x86_64-unknown-linux-musl
+        sort=sort
+    else
+        target=x86_64-apple-darwin
+        sort=gsort  # for `sort --sort-version`, from brew's coreutils.
+    fi
+
+    # Builds for iOS are done on OSX, but require the specific target to be
+    # installed.
+    case $TARGET in
+        aarch64-apple-ios)
+            rustup target install aarch64-apple-ios
+            ;;
+        armv7-apple-ios)
+            rustup target install armv7-apple-ios
+            ;;
+        armv7s-apple-ios)
+            rustup target install armv7s-apple-ios
+            ;;
+        i386-apple-ios)
+            rustup target install i386-apple-ios
+            ;;
+        x86_64-apple-ios)
+            rustup target install x86_64-apple-ios
+            ;;
+    esac
+
+    # This fetches latest stable release
+    local tag=$(git ls-remote --tags --refs --exit-code https://github.com/japaric/cross \
+                       | cut -d/ -f3 \
+                       | grep -E '^v[0.1.0-9.]+$' \
+                       | $sort --version-sort \
+                       | tail -n1)
+    curl -LSfs https://japaric.github.io/trust/install.sh | \
+        sh -s -- \
+           --force \
+           --git japaric/cross \
+           --tag $tag \
+           --target $target
+}
+
+main
--- a/ci/script.sh
+++ b/ci/script.sh
@@ -0,0 +1,23 @@
+# This script takes care of testing your crate
+
+set -ex
+
+main() {
+    cross build --target $TARGET
+    cross build --target $TARGET --release
+
+    if [ ! -z $DISABLE_TESTS ]; then
+        return
+    fi
+
+    cross test --target $TARGET
+    # cross test --target $TARGET --release
+
+    # cross run --target $TARGET
+    # cross run --target $TARGET --release
+}
+
+# we don't run the "test phase" when doing deploys
+if [ -z $TRAVIS_TAG ]; then
+    main
+fi
--- a/cpp/simdcomp/.gitignore
+++ b/cpp/simdcomp/.gitignore
@@ -1,9 +0,0 @@
-Makefile.in
-lib*
-unit*
-*.o
-src/*.lo
-src/*.o
-src/.deps
-src/.dirstamp
-src/.libs
--- a/cpp/simdcomp/.travis.yml
+++ b/cpp/simdcomp/.travis.yml
@@ -1,11 +0,0 @@
-language: c
-sudo: false
-compiler:
-  - gcc
-  - clang
-
-branches:
-  only:
-    - master
-
-script: make && ./unit
--- a/cpp/simdcomp/CHANGELOG
+++ b/cpp/simdcomp/CHANGELOG
@@ -1,9 +0,0 @@
-Upcoming
-  - added missing include
-  - improved portability (MSVC)
-  - implemented C89 compatibility
-Version 0.0.3 (19 May 2014)
-  - improved documentation
-Version 0.0.2 (6 February 2014)
-  - added go demo
-Version 0.0.1  (5 February 2014)
--- a/cpp/simdcomp/LICENSE
+++ b/cpp/simdcomp/LICENSE
@@ -1,27 +0,0 @@
-Copyright (c) 2014--, The authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice, this
-  list of conditions and the following disclaimer in the documentation and/or
-  other materials provided with the distribution.
-
-* Neither the name of the {organization} nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/cpp/simdcomp/README.md
+++ b/cpp/simdcomp/README.md
@@ -1,137 +0,0 @@
-The SIMDComp library
-====================
-[![Build Status](https://travis-ci.org/lemire/simdcomp.png)](https://travis-ci.org/lemire/simdcomp)
-
-A simple C library for compressing lists of integers using binary packing and SIMD instructions.
-The assumption is either that you have a list of 32-bit integers where most of them are small, or a list of 32-bit integers where differences between successive integers are small. No software is able to reliably compress an array of 32-bit random numbers.
-
-This library can decode at least 4 billions of compressed integers per second on most
-desktop or laptop processors. That is, it can decompress data at a rate of 15 GB/s.
-This is significantly faster than generic codecs like gzip, LZO, Snappy or LZ4.
-
-On a Skylake Intel processor, it can decode integers at a rate 0.3 cycles per integer,
-which can easily translate into more than 8 decoded billions integers per second.
-
-Contributors: Daniel Lemire, Nathan Kurz, Christoph Rupp, Anatol Belski, Nick White and others
-
-What is it for?
-------------
-
-This is a low-level library for fast integer compression. By design it does not define a compressed
-format. It is up to the (sophisticated) user to create a compressed format.
-
-Requirements
-------------
-
- Your processor should support SSE4.1 (It is supported by most Intel and AMD processors released since 2008.)
- It is possible to build the core part of the code if your processor support SSE2 (Pentium4 or better)
- C99 compliant compiler (GCC is assumed)
- A Linux-like distribution is assumed by the makefile
-
-For a plain C version that does not use SIMD instructions, see https://github.com/lemire/LittleIntPacker
-
-Usage
-------
-
-Compression works over blocks of 128 integers.
-
-For a complete working example, see example.c (you can build it and
-run it with "make example; ./example").
-
-
-
-1) Lists of integers in random order.
-
-```C            
-const uint32_t b = maxbits(datain);// computes bit width
-simdpackwithoutmask(datain, buffer, b);//compressed to buffer, compressing 128 32-bit integers down to b*32 bytes
-simdunpack(buffer, backbuffer, b);//uncompressed to backbuffer
-```
-
-While 128 32-bit integers are read, only b 128-bit words are written. Thus, the compression ratio is 32/b.
-
-2) Sorted lists of integers.
-
-We used differential coding: we store the difference between successive integers. For this purpose, we need an initial value (called offset).
-
-```C            
-uint32_t offset = 0;
-uint32_t b1 = simdmaxbitsd1(offset,datain); // bit width
-simdpackwithoutmaskd1(offset, datain, buffer, b1);//compressing 128 32-bit integers down to b1*32 bytes
-simdunpackd1(offset, buffer, backbuffer, b1);//uncompressed
-```
-
-General example for arrays of arbitrary length:
-```C
-int compress_decompress_demo() {
-  size_t k, N = 9999;
-  __m128i * endofbuf;
-  uint32_t * datain = malloc(N * sizeof(uint32_t));
-  uint8_t * buffer;
-  uint32_t * backbuffer = malloc(N * sizeof(uint32_t));
-  uint32_t b;
-
-  for (k = 0; k < N; ++k){        /* start with k=0, not k=1! */
-    datain[k] = k;
-  }
-
-  b = maxbits_length(datain, N);
-  buffer = malloc(simdpack_compressedbytes(N,b)); // allocate just enough memory
-  endofbuf = simdpack_length(datain, N, (__m128i *)buffer, b);
-  /* compressed data is stored between buffer and endofbuf using (endofbuf-buffer)*sizeof(__m128i) bytes */
-  /* would be safe to do : buffer = realloc(buffer,(endofbuf-(__m128i *)buffer)*sizeof(__m128i)); */
-  simdunpack_length((const __m128i *)buffer, N, backbuffer, b);
-
-  for (k = 0; k < N; ++k){
-    if(datain[k] != backbuffer[k]) {
-      printf("bug\n");
-      return -1;
-    }
-  }
-  return 0;
-}
-```
-
-
-3) Frame-of-Reference 
-
-We also have frame-of-reference (FOR) functions (see simdfor.h header). They work like the bit packing
-routines, but do not use differential coding so they allow faster search in some cases, at the expense
-of compression.
-
-Setup
---------
-
-
-make
-make test
-
-and if you are daring:
-
-make install
-
-Go
--------
-
-If you are a go user, there is a "go" folder where you will find a simple demo.
-
-Other libraries
----------------
-
-* Fast decoder for VByte-compressed integers https://github.com/lemire/MaskedVByte
-* Fast integer compression in C using StreamVByte https://github.com/lemire/streamvbyte
-* FastPFOR is a C++ research library well suited to compress unsorted arrays: https://github.com/lemire/FastPFor
-* SIMDCompressionAndIntersection is a C++ research library well suited for sorted arrays (differential coding)
-and computing intersections: https://github.com/lemire/SIMDCompressionAndIntersection
-* TurboPFor is a C library that offers lots of interesting optimizations. Well worth checking! (GPL license) https://github.com/powturbo/TurboPFor
-* Oroch is a C++ library that offers a usable API (MIT license) https://github.com/ademakov/Oroch
-
-
-References
------------
-
-* Daniel Lemire, Leonid Boytsov, Nathan Kurz, SIMD Compression and the Intersection of Sorted Integers, Software Practice & Experience 46 (6) 2016. http://arxiv.org/abs/1401.6399
-* Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second through vectorization, Software Practice & Experience 45 (1), 2015.  http://arxiv.org/abs/1209.2137 http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract
-* Jeff Plaisance, Nathan Kurz, Daniel Lemire, Vectorized VByte Decoding, International Symposium on Web Algorithms 2015, 2015. http://arxiv.org/abs/1503.07387
-* Wayne Xin Zhao, Xudong Zhang, Daniel Lemire, Dongdong Shan, Jian-Yun Nie, Hongfei Yan, Ji-Rong Wen, A General SIMD-based Approach to Accelerating Compression Algorithms, ACM Transactions on Information Systems 33 (3), 2015. http://arxiv.org/abs/1502.01916
-* T. D. Wu, Bitpacking techniques for indexing genomes: I. Hash tables, Algorithms for Molecular Biology 11 (5), 2016. http://almob.biomedcentral.com/articles/10.1186/s13015-016-0069-5
--- a/cpp/simdcomp/benchmarks/benchmark.c
+++ b/cpp/simdcomp/benchmarks/benchmark.c
@@ -1,235 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-
-#include "simdcomp.h"
-
-#ifdef _MSC_VER
-# include <windows.h>
-
-__int64 freq;
-
-typedef __int64 time_snap_t;
-
-static time_snap_t time_snap(void)
-{
-	__int64 now;
-
-	QueryPerformanceCounter((LARGE_INTEGER *)&now);
-
-	return (__int64)((now*1000000)/freq);
-}
-# define TIME_SNAP_FMT "%I64d"
-#else
-# define time_snap clock
-# define TIME_SNAP_FMT "%lu"
-typedef clock_t time_snap_t;
-#endif
-
-
-void benchmarkSelect() {
-    uint32_t buffer[128];
-    uint32_t backbuffer[128];
-    uint32_t initial = 33;
-    uint32_t b;
-    time_snap_t S1, S2, S3;
-    int i;
-    printf("benchmarking select \n");
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 0; b <= 32; b++) {
-        uint32_t prev = initial;
-        uint32_t out[128];
-        /* initialize the buffer */
-        for (i = 0; i < 128; i++) {
-            buffer[i] =  ((uint32_t)(1655765 * i )) ;
-            if(b < 32) buffer[i] %= (1<<b);
-        }
-        for (i = 0; i < 128; i++) {
-            buffer[i] = buffer[i] + prev;
-            prev = buffer[i];
-        }
-
-        for (i = 1; i < 128; i++) {
-            if(buffer[i] < buffer[i-1] )
-                buffer[i] = buffer[i-1];
-        }
-        assert(simdmaxbitsd1(initial, buffer)<=b);
-
-        for (i = 0; i < 128; i++) {
-            out[i] = 0; /* memset would do too */
-        }
-
-        /* delta-encode to 'i' bits */
-        simdpackwithoutmaskd1(initial, buffer, (__m128i *)out, b);
-
-        S1 = time_snap();
-        for (i = 0; i < 128 * 10; i++) {
-            uint32_t valretrieved = simdselectd1(initial, (__m128i *)out, b, (uint32_t)i % 128);
-            assert(valretrieved == buffer[i%128]);
-        }
-        S2 = time_snap();
-        for (i = 0; i < 128 * 10; i++) {
-            simdunpackd1(initial,  (__m128i *)out, backbuffer, b);
-            assert(backbuffer[i % 128] == buffer[i % 128]);
-        }
-        S3 = time_snap();
-        printf("bit width = %d, fast select function time = " TIME_SNAP_FMT ", naive time = " TIME_SNAP_FMT "  \n", b, (S2-S1), (S3-S2));
-    }
-}
-
-int uint32_cmp(const void *a, const void *b)
-{
-    const uint32_t *ia = (const uint32_t *)a;
-    const uint32_t *ib = (const uint32_t *)b;
-    if(*ia < *ib)
-        return -1;
-    else if (*ia > *ib)
-        return 1;
-    return 0;
-}
-
-/* adapted from wikipedia */
-int binary_search(uint32_t * A, uint32_t key, int imin, int imax)
-{
-    int imid;
-    imax --;
-    while(imin + 1 < imax) {
-        imid = imin + ((imax - imin) / 2);
-
-        if (A[imid] > key) {
-            imax = imid;
-        } else if (A[imid] < key) {
-            imin = imid;
-        } else {
-            return imid;
-        }
-    }
-    return imax;
-}
-
-
-/* adapted from wikipedia */
-int lower_bound(uint32_t * A, uint32_t key, int imin, int imax)
-{
-    int imid;
-    imax --;
-    while(imin + 1 < imax) {
-        imid = imin + ((imax - imin) / 2);
-
-        if (A[imid] >= key) {
-            imax = imid;
-        } else if (A[imid] < key) {
-            imin = imid;
-        }
-    }
-    if(A[imin] >= key) return imin;
-    return imax;
-}
-
-void benchmarkSearch() {
-    uint32_t buffer[128];
-    uint32_t backbuffer[128];
-    uint32_t out[128];
-    uint32_t result, initial = 0;
-    uint32_t b, i;
-    time_snap_t S1, S2, S3, S4;
-
-    printf("benchmarking search \n");
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 0; b <= 32; b++) {
-        uint32_t prev = initial;
-        /* initialize the buffer */
-        for (i = 0; i < 128; i++) {
-            buffer[i] =  ((uint32_t)rand()) ;
-            if(b < 32) buffer[i] %= (1<<b);
-        }
-
-        qsort(buffer,128, sizeof(uint32_t), uint32_cmp);
-
-        for (i = 0; i < 128; i++) {
-            buffer[i] = buffer[i] + prev;
-            prev = buffer[i];
-        }
-        for (i = 1; i < 128; i++) {
-            if(buffer[i] < buffer[i-1] )
-                buffer[i] = buffer[i-1];
-        }
-        assert(simdmaxbitsd1(initial, buffer)<=b);
-        for (i = 0; i < 128; i++) {
-            out[i] = 0; /* memset would do too */
-        }
-
-        /* delta-encode to 'i' bits */
-        simdpackwithoutmaskd1(initial, buffer, (__m128i *)out, b);
-        simdunpackd1(initial,  (__m128i *)out, backbuffer, b);
-
-        for (i = 0; i < 128; i++) {
-            assert(buffer[i] == backbuffer[i]);
-         }
-        S1 = time_snap();
-        for (i = 0; i < 128 * 10; i++) {
-
-            int pos;
-            uint32_t pseudorandomkey  =  buffer[i%128];
-            __m128i vecinitial = _mm_set1_epi32(initial);
-            pos = simdsearchd1(&vecinitial, (__m128i *)out, b,
-                               pseudorandomkey, &result);
-            if((result < pseudorandomkey) || (buffer[pos] != result)) {
-                printf("bug A.\n");
-            } else if (pos > 0) {
-                if(buffer[pos-1] >= pseudorandomkey)
-                    printf("bug B.\n");
-            }
-        }
-        S2 = time_snap();
-        for (i = 0; i < 128 * 10; i++) {
-            int pos;
-            uint32_t pseudorandomkey  =  buffer[i%128];
-            simdunpackd1(initial,  (__m128i *)out, backbuffer, b);
-            pos =  lower_bound(backbuffer, pseudorandomkey, 0, 128);
-            result = backbuffer[pos];
-
-            if((result < pseudorandomkey) || (buffer[pos] != result)) {
-                printf("bug C.\n");
-            } else if (pos > 0) {
-                if(buffer[pos-1] >= pseudorandomkey)
-                    printf("bug D.\n");
-            }
-        }
-        S3 = time_snap();
-        for (i = 0; i < 128 * 10; i++) {
-
-            int pos;
-            uint32_t pseudorandomkey  =  buffer[i%128];
-            pos = simdsearchwithlengthd1(initial, (__m128i *)out, b, 128,
-                               pseudorandomkey, &result);
-            if((result < pseudorandomkey) || (buffer[pos] != result)) {
-                printf("bug A.\n");
-            } else if (pos > 0) {
-                if(buffer[pos-1] >= pseudorandomkey)
-                    printf("bug B.\n");
-            }
-        }
-        S4 = time_snap();
-
-        printf("bit width = %d, fast search function time = " TIME_SNAP_FMT ", naive time = " TIME_SNAP_FMT " , fast with length time = " TIME_SNAP_FMT "  \n", b, (S2-S1), (S3-S2), (S4-S3) );
-    }
-}
-
-
-int main() {
-#ifdef _MSC_VER
-    QueryPerformanceFrequency((LARGE_INTEGER *)&freq);
-#endif
-    benchmarkSearch();
-    benchmarkSelect();
-    return 0;
-}
--- a/cpp/simdcomp/benchmarks/bitpackingbenchmark.c
+++ b/cpp/simdcomp/benchmarks/bitpackingbenchmark.c
@@ -1,205 +0,0 @@
-#include <stdio.h>
-
-#include "simdcomp.h"
-
-
-#define RDTSC_START(cycles)                                                   \
-    do {                                                                      \
-        register unsigned cyc_high, cyc_low;                                  \
-        __asm volatile(                                                       \
-            "cpuid\n\t"                                                       \
-            "rdtsc\n\t"                                                       \
-            "mov %%edx, %0\n\t"                                               \
-            "mov %%eax, %1\n\t"                                               \
-            : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", "%rdx"); \
-        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;                      \
-    } while (0)
-
-#define RDTSC_FINAL(cycles)                                                   \
-    do {                                                                      \
-        register unsigned cyc_high, cyc_low;                                  \
-        __asm volatile(                                                       \
-            "rdtscp\n\t"                                                      \
-            "mov %%edx, %0\n\t"                                               \
-            "mov %%eax, %1\n\t"                                               \
-            "cpuid\n\t"                                                       \
-            : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", "%rdx"); \
-        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;                      \
-    } while (0)
-
-
-
-
-uint32_t * get_random_array_from_bit_width(uint32_t length, uint32_t bit) {
-    uint32_t * answer = malloc(sizeof(uint32_t) * length);
-    uint32_t mask = (uint32_t) ((UINT64_C(1) << bit) - 1);
-    uint32_t i;
-    for(i = 0; i < length; ++i) {
-        answer[i] = rand() & mask;
-    }
-    return answer;
-}
-
-uint32_t * get_random_array_from_bit_width_d1(uint32_t length, uint32_t bit) {
-    uint32_t * answer = malloc(sizeof(uint32_t) * length);
-    uint32_t mask = (uint32_t) ((UINT64_C(1) << bit) - 1);
-    uint32_t i;
-    answer[0] = rand() & mask;
-    for(i = 1; i < length; ++i) {
-        answer[i] = answer[i-1] + (rand() & mask);
-    }
-    return answer;
-}
-
-
-void demo128() {
-    const uint32_t length = 128;
-    uint32_t bit;
-    printf("# --- %s\n", __func__);
-    printf("# compressing %d integers\n",length);
-    printf("# format: bit width, pack in cycles per int, unpack in cycles per int\n");
-    for(bit = 1; bit <= 32; ++bit) {
-        uint32_t i;
-
-        uint32_t * data = get_random_array_from_bit_width(length, bit);
-        __m128i * buffer = malloc(length * sizeof(uint32_t));
-        uint32_t * backdata = malloc(length * sizeof(uint32_t));
-        uint32_t repeat = 500;
-        uint64_t min_diff;
-        printf("%d\t",bit);
-        min_diff = (uint64_t)-1;
-        for (i = 0; i < repeat; i++) {
-            uint64_t cycles_start, cycles_final, cycles_diff;
-            __asm volatile("" ::: /* pretend to clobber */ "memory");
-            RDTSC_START(cycles_start);
-            simdpackwithoutmask(data,buffer, bit);
-            RDTSC_FINAL(cycles_final);
-            cycles_diff = (cycles_final - cycles_start);
-            if (cycles_diff < min_diff) min_diff = cycles_diff;
-        }
-        printf("%.2f\t",min_diff*1.0/length);
-        min_diff = (uint64_t)-1;
-        for (i = 0; i < repeat; i++) {
-            uint64_t cycles_start, cycles_final, cycles_diff;
-            __asm volatile("" ::: /* pretend to clobber */ "memory");
-            RDTSC_START(cycles_start);
-            simdunpack(buffer, backdata,bit);
-            RDTSC_FINAL(cycles_final);
-            cycles_diff = (cycles_final - cycles_start);
-            if (cycles_diff < min_diff) min_diff = cycles_diff;
-        }
-        printf("%.2f\t",min_diff*1.0/length);
-
-        free(data);
-        free(buffer);
-        free(backdata);
-        printf("\n");
-    }
-    printf("\n\n"); /* two blank lines are required by gnuplot */
-}
-
-void demo128_d1() {
-    const uint32_t length = 128;
-    uint32_t bit;
-    printf("# --- %s\n", __func__);
-    printf("# compressing %d integers\n",length);
-    printf("# format: bit width, pack in cycles per int, unpack in cycles per int\n");
-    for(bit = 1; bit <= 32; ++bit) {
-        uint32_t i;
-
-        uint32_t * data = get_random_array_from_bit_width_d1(length, bit);
-        __m128i * buffer = malloc(length * sizeof(uint32_t));
-        uint32_t * backdata = malloc(length * sizeof(uint32_t));
-        uint32_t repeat = 500;
-        uint64_t min_diff;
-        printf("%d\t",bit);
-        min_diff = (uint64_t)-1;
-        for (i = 0; i < repeat; i++) {
-            uint64_t cycles_start, cycles_final, cycles_diff;
-            __asm volatile("" ::: /* pretend to clobber */ "memory");
-            RDTSC_START(cycles_start);
-            simdpackwithoutmaskd1(0,data,buffer, bit);
-            RDTSC_FINAL(cycles_final);
-            cycles_diff = (cycles_final - cycles_start);
-            if (cycles_diff < min_diff) min_diff = cycles_diff;
-        }
-        printf("%.2f\t",min_diff*1.0/length);
-        min_diff = (uint64_t)-1;
-        for (i = 0; i < repeat; i++) {
-            uint64_t cycles_start, cycles_final, cycles_diff;
-            __asm volatile("" ::: /* pretend to clobber */ "memory");
-            RDTSC_START(cycles_start);
-            simdunpackd1(0,buffer, backdata,bit);
-            RDTSC_FINAL(cycles_final);
-            cycles_diff = (cycles_final - cycles_start);
-            if (cycles_diff < min_diff) min_diff = cycles_diff;
-        }
-        printf("%.2f\t",min_diff*1.0/length);
-
-        free(data);
-        free(buffer);
-        free(backdata);
-        printf("\n");
-    }
-    printf("\n\n"); /* two blank lines are required by gnuplot */
-}
-
-#ifdef __AVX2__
-void demo256() {
-    const uint32_t length = 256;
-    uint32_t bit;
-    printf("# --- %s\n", __func__);
-    printf("# compressing %d integers\n",length);
-    printf("# format: bit width, pack in cycles per int, unpack in cycles per int\n");
-    for(bit = 1; bit <= 32; ++bit) {
-        uint32_t i;
-
-        uint32_t * data = get_random_array_from_bit_width(length, bit);
-        __m256i * buffer = malloc(length * sizeof(uint32_t));
-        uint32_t * backdata = malloc(length * sizeof(uint32_t));
-        uint32_t repeat = 500;
-        uint64_t min_diff;
-        printf("%d\t",bit);
-        min_diff = (uint64_t)-1;
-        for (i = 0; i < repeat; i++) {
-            uint64_t cycles_start, cycles_final, cycles_diff;
-            __asm volatile("" ::: /* pretend to clobber */ "memory");
-            RDTSC_START(cycles_start);
-            avxpackwithoutmask(data,buffer, bit);
-            RDTSC_FINAL(cycles_final);
-            cycles_diff = (cycles_final - cycles_start);
-            if (cycles_diff < min_diff) min_diff = cycles_diff;
-        }
-        printf("%.2f\t",min_diff*1.0/length);
-        min_diff = (uint64_t)-1;
-        for (i = 0; i < repeat; i++) {
-            uint64_t cycles_start, cycles_final, cycles_diff;
-            __asm volatile("" ::: /* pretend to clobber */ "memory");
-            RDTSC_START(cycles_start);
-            avxunpack(buffer, backdata,bit);
-            RDTSC_FINAL(cycles_final);
-            cycles_diff = (cycles_final - cycles_start);
-            if (cycles_diff < min_diff) min_diff = cycles_diff;
-        }
-        printf("%.2f\t",min_diff*1.0/length);
-
-        free(data);
-        free(buffer);
-        free(backdata);
-        printf("\n");
-    }
-    printf("\n\n"); /* two blank lines are required by gnuplot */
-}
-#endif /* avx 2 */
-
-
-int main() {
-    demo128();
-    demo128_d1();
-#ifdef __AVX2__
-    demo256();
-#endif
-    return 0;
-
-
-}
--- a/cpp/simdcomp/example.c
+++ b/cpp/simdcomp/example.c
@@ -1,195 +0,0 @@
-/* Type "make example" to build this example program. */
-#include <stdio.h>
-#include <time.h>
-#include <stdlib.h>
-#include "simdcomp.h"
-
-/**
-We provide several different code examples.
-**/
-
-
-/* very simple test to illustrate a simple application */
-int compress_decompress_demo() {
-    size_t k, N = 9999;
-    __m128i * endofbuf;
-    int howmanybytes;
-    float compratio;
-    uint32_t * datain = malloc(N * sizeof(uint32_t));
-    uint8_t * buffer;
-    uint32_t * backbuffer = malloc(N * sizeof(uint32_t));
-    uint32_t b;
-    printf("== simple test\n");
-
-    for (k = 0; k < N; ++k) {       /* start with k=0, not k=1! */
-        datain[k] = k;
-    }
-
-    b = maxbits_length(datain, N);
-    buffer = malloc(simdpack_compressedbytes(N,b));
-    endofbuf = simdpack_length(datain, N, (__m128i *)buffer, b);
-    howmanybytes = (endofbuf-(__m128i *)buffer)*sizeof(__m128i); /* number of compressed bytes */
-    compratio = N*sizeof(uint32_t) * 1.0 / howmanybytes;
-    /* endofbuf points to the end of the compressed data */
-    buffer = realloc(buffer,(endofbuf-(__m128i *)buffer)*sizeof(__m128i)); /* optional but safe. */
-    printf("Compressed %d integers down to %d bytes (comp. ratio = %f).\n",(int)N,howmanybytes,compratio);
-    /* in actual applications b must be stored and retrieved: caller is responsible for that. */
-    simdunpack_length((const __m128i *)buffer, N, backbuffer, b); /* will return a pointer to endofbuf */ 
-
-    for (k = 0; k < N; ++k) {
-        if(datain[k] != backbuffer[k]) {
-            printf("bug at %lu \n",(unsigned long)k);
-            return -1;
-        }
-    }
-    printf("Code works!\n");
-    free(datain);
-    free(buffer);
-    free(backbuffer);
-    return 0;
-}
-
-
-
-/* compresses data from datain to buffer, returns how many bytes written
-used below in simple_demo */
-size_t compress(uint32_t * datain, size_t length, uint8_t * buffer) {
-    uint32_t offset;
-    uint8_t * initout;
-    size_t k;
-    if(length/SIMDBlockSize*SIMDBlockSize != length) {
-        printf("Data length should be a multiple of %i \n",SIMDBlockSize);
-    }
-    offset = 0;
-    initout = buffer;
-    for(k = 0; k < length / SIMDBlockSize; ++k) {
-        uint32_t b = simdmaxbitsd1(offset,
-                                   datain + k * SIMDBlockSize);
-        *buffer++ = b;
-        simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, (__m128i *) buffer,
-                              b);
-        offset = datain[k * SIMDBlockSize + SIMDBlockSize - 1];
-        buffer += b * sizeof(__m128i);
-    }
-    return buffer - initout;
-}
-
-/* Another illustration ... */
-void simple_demo() {
-    size_t REPEAT = 10, gap;
-    size_t N = 1000 * SIMDBlockSize;/* SIMDBlockSize is 128 */
-    uint32_t * datain = malloc(N * sizeof(uint32_t));
-    size_t compsize;
-    clock_t start, end;
-    uint8_t * buffer = malloc(N * sizeof(uint32_t) + N / SIMDBlockSize); /* output buffer */
-    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-    printf("== simple demo\n");
-    for (gap = 1; gap <= 243; gap *= 3) {
-        size_t k, repeat;
-        uint32_t offset = 0;
-        uint32_t bogus = 0;
-        double numberofseconds;
-
-        printf("\n");
-        printf(" gap = %lu \n", (unsigned long) gap);
-        datain[0] = 0;
-        for (k = 1; k < N; ++k)
-            datain[k] = datain[k-1] + ( rand() % (gap + 1) );
-        compsize = compress(datain,N,buffer);
-        printf("compression ratio = %f \n",  (N * sizeof(uint32_t))/ (compsize * 1.0 ));
-        start = clock();
-        for(repeat = 0; repeat < REPEAT; ++repeat) {
-            uint8_t * decbuffer = buffer;
-            for (k = 0; k * SIMDBlockSize < N; ++k) {
-                uint8_t b = *decbuffer++;
-                simdunpackd1(offset, (__m128i *) decbuffer, backbuffer, b);
-                /* do something here with backbuffer */
-                bogus += backbuffer[3];
-                decbuffer += b * sizeof(__m128i);
-                offset = backbuffer[SIMDBlockSize - 1];
-            }
-        }
-        end = clock();
-        numberofseconds = (end-start)/(double)CLOCKS_PER_SEC;
-        printf("decoding speed in million of integers per second %f \n",N*REPEAT/(numberofseconds*1000.0*1000.0));
-        start = clock();
-        for(repeat = 0; repeat < REPEAT; ++repeat) {
-            uint8_t * decbuffer = buffer;
-            for (k = 0; k * SIMDBlockSize < N; ++k) {
-                memcpy(backbuffer,decbuffer+k*SIMDBlockSize,SIMDBlockSize*sizeof(uint32_t));
-                bogus += backbuffer[3] - backbuffer[100];
-            }
-        }
-        end = clock();
-        numberofseconds = (end-start)/(double)CLOCKS_PER_SEC;
-        printf("memcpy speed in million of integers per second %f \n",N*REPEAT/(numberofseconds*1000.0*1000.0));
-        printf("ignore me %i \n",bogus);
-        printf("All tests are in CPU cache. Avoid out-of-cache decoding in applications.\n");
-    }
-    free(buffer);
-    free(datain);
-    free(backbuffer);
-}
-
-/* Used below in more_sophisticated_demo ... */
-size_t varying_bit_width_compress(uint32_t * datain, size_t length, uint8_t * buffer) {
-    uint8_t * initout;
-    size_t k;
-    if(length/SIMDBlockSize*SIMDBlockSize != length) {
-        printf("Data length should be a multiple of %i \n",SIMDBlockSize);
-    }
-    initout = buffer;
-    for(k = 0; k < length / SIMDBlockSize; ++k) {
-        uint32_t b = maxbits(datain);
-        *buffer++ = b;
-        simdpackwithoutmask(datain, (__m128i *)buffer, b);
-        datain += SIMDBlockSize;
-        buffer += b * sizeof(__m128i);
-    }
-    return buffer - initout;
-}
-
-/* Here we compress the data in blocks of 128 integers with varying bit width */
-int varying_bit_width_demo() {
-    size_t nn = 128 * 2;
-    uint32_t * datainn = malloc(nn * sizeof(uint32_t));
-    uint8_t * buffern = malloc(nn * sizeof(uint32_t) + nn / SIMDBlockSize);
-    uint8_t * initbuffern = buffern;
-    uint32_t * backbuffern = malloc(nn * sizeof(uint32_t));
-    size_t k, compsize;
-    printf("== varying bit-width demo\n");
-
-    for(k=0; k<nn; ++k) {
-        datainn[k] = rand() % (k + 1);
-    }
-
-    compsize = varying_bit_width_compress(datainn,nn,buffern);
-    printf("encoded size: %u (original size: %u)\n", (unsigned)compsize,
-           (unsigned)(nn * sizeof(uint32_t)));
-
-    for (k = 0; k * SIMDBlockSize < nn; ++k) {
-        uint32_t b = *buffern;
-        buffern++;
-        simdunpack((const __m128i *)buffern, backbuffern + k * SIMDBlockSize, b);
-        buffern += b * sizeof(__m128i);
-    }
-
-    for (k = 0; k < nn; ++k) {
-        if(backbuffern[k] != datainn[k]) {
-            printf("bug\n");
-            return -1;
-        }
-    }
-    printf("Code works!\n");
-    free(datainn);
-    free(initbuffern);
-    free(backbuffern);
-    return 0;
-}
-
-int main() {
-    if(compress_decompress_demo() != 0) return -1;
-    if(varying_bit_width_demo() != 0) return -1;
-    simple_demo();
-    return 0;
-}
--- a/cpp/simdcomp/go/README.md
+++ b/cpp/simdcomp/go/README.md
@@ -1,13 +0,0 @@
-Simple Go demo
-==============
-
-Setup
-======
-
-Start by installing the simdcomp library (make && make install).
-
-Then type:
-
-go run test.go
-
-
--- a/cpp/simdcomp/go/test.go
+++ b/cpp/simdcomp/go/test.go
@@ -1,71 +0,0 @@
-/////////
-// This particular file is in the public domain.
-// Author: Daniel Lemire
-////////
-
-package main 
-
-/*
-#cgo LDFLAGS: -lsimdcomp
-#include <simdcomp.h>
-*/
-import "C"
-import "fmt"
-
-//////////
-// For this demo, we pack and unpack blocks of 128 integers
-/////////
-func main() {
-        // I am going to use C types. Alternative might be to use unsafe.Pointer calls, see http://bit.ly/1ndw3W3
-        // this is our original data
-        var data [128]C.uint32_t
-        for i := C.uint32_t(0); i < C.uint32_t(128); i++ {
-            data[i] = i
-        }
-
-
-
-
-
-        ////////////
-        // We first pack without differential coding
-        ///////////
-        // computing how many bits per int. is needed
-        b  := C.maxbits(&data[0])
-        ratio := 32.0/float64(b)
-        fmt.Println("Bit width  ", b)
-        fmt.Println(fmt.Sprintf("Compression ratio %f ", ratio))
-         // we are now going to create a buffer to receive the packed data (each __m128i uses 128 bits)
-        out := make([] C.__m128i,b)       
-        C.simdpackwithoutmask( &data[0],&out[0],b);
-        var recovereddata [128]C.uint32_t
-        C.simdunpack(&out[0],&recovereddata[0],b)
-        for i := 0; i < 128; i++ {
-            if data[i] != recovereddata[i]  {
-                  fmt.Println("Bug ")
-                  return
-            }
-        } 
-
-        ///////////
-        // Next, we use differential coding
-        //////////
-        offset := C.uint32_t(0) // if you pack data from K to K + 128, offset should be the value at K-1. When K = 0, choose a default
-        b1  := C.simdmaxbitsd1(offset,&data[0])
-        ratio1 := 32.0/float64(b1)
-        fmt.Println("Bit width  ", b1)
-        fmt.Println(fmt.Sprintf("Compression ratio %f ", ratio1))
-         // we are now going to create a buffer to receive the packed data (each __m128i uses 128 bits)
-        out = make([] C.__m128i,b1)       
-        C.simdpackwithoutmaskd1(offset, &data[0],&out[0],b1);
-        C.simdunpackd1(offset,&out[0],&recovereddata[0],b1)
-        for i := 0; i < 128; i++ {
-            if data[i] != recovereddata[i]  {
-                  fmt.Println("Bug ")
-                  return
-            }
-        } 
-
-        fmt.Println("test succesful.")
-      
-}
--- a/cpp/simdcomp/include/avxbitpacking.h
+++ b/cpp/simdcomp/include/avxbitpacking.h
@@ -1,40 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-
-#ifndef INCLUDE_AVXBITPACKING_H_
-#define INCLUDE_AVXBITPACKING_H_
-
-
-#ifdef __AVX2__
-
-#include "portability.h"
-
-
-/* AVX2 is required */
-#include <immintrin.h>
-/* for memset */
-#include <string.h>
-
-#include "simdcomputil.h"
-
-enum{ AVXBlockSize = 256};
-
-/* max integer logarithm over a range of AVXBlockSize integers (256 integer) */
-uint32_t avxmaxbits(const uint32_t * begin);
-
-/* reads 256 values from "in", writes  "bit" 256-bit vectors to "out" */
-void avxpack(const uint32_t *  in,__m256i *  out, const uint32_t bit);
-
-/* reads 256 values from "in", writes  "bit" 256-bit vectors to "out" */
-void avxpackwithoutmask(const uint32_t *  in,__m256i *  out, const uint32_t bit);
-
-/* reads  "bit" 256-bit vectors from "in", writes  256 values to "out" */
-void avxunpack(const __m256i *  in,uint32_t *  out, const uint32_t bit);
-
-
-
-
-#endif /* __AVX2__ */
-
-#endif /* INCLUDE_AVXBITPACKING_H_ */
--- a/cpp/simdcomp/include/portability.h
+++ b/cpp/simdcomp/include/portability.h
@@ -1,81 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-#ifndef SIMDBITCOMPAT_H_
-#define SIMDBITCOMPAT_H_
-
-#include <iso646.h> /* mostly for Microsoft compilers */
-#include <string.h>
-
-#if SIMDCOMP_DEBUG
-# define SIMDCOMP_ALWAYS_INLINE inline
-# define SIMDCOMP_NEVER_INLINE
-# define SIMDCOMP_PURE
-#else
-# if defined(__GNUC__)
-#  if __GNUC__ >= 3
-#   define SIMDCOMP_ALWAYS_INLINE inline __attribute__((always_inline))
-#   define SIMDCOMP_NEVER_INLINE __attribute__((noinline))
-#   define SIMDCOMP_PURE __attribute__((pure))
-#  else
-#   define SIMDCOMP_ALWAYS_INLINE inline
-#   define SIMDCOMP_NEVER_INLINE
-#   define SIMDCOMP_PURE
-#  endif
-# elif defined(_MSC_VER)
-#  define SIMDCOMP_ALWAYS_INLINE __forceinline
-#  define SIMDCOMP_NEVER_INLINE
-#  define SIMDCOMP_PURE
-# else
-#  if __has_attribute(always_inline)
-#   define SIMDCOMP_ALWAYS_INLINE inline __attribute__((always_inline))
-#  else
-#   define SIMDCOMP_ALWAYS_INLINE inline
-#  endif
-#  if __has_attribute(noinline)
-#   define SIMDCOMP_NEVER_INLINE __attribute__((noinline))
-#  else
-#   define SIMDCOMP_NEVER_INLINE
-#  endif
-#  if __has_attribute(pure)
-#   define SIMDCOMP_PURE __attribute__((pure))
-#  else
-#   define SIMDCOMP_PURE
-#  endif
-# endif
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER < 1600
-typedef unsigned int uint32_t;
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-#else
-#include <stdint.h> /* part of Visual Studio 2010 and better, others likely anyway */
-#endif
-
-#if defined(_MSC_VER)
-#define SIMDCOMP_ALIGNED(x) __declspec(align(x))
-#else
-#if defined(__GNUC__)
-#define SIMDCOMP_ALIGNED(x) __attribute__ ((aligned(x)))
-#endif
-#endif
-
-#if defined(_MSC_VER)
-# include <intrin.h>
-/* 64-bit needs extending */
-# define SIMDCOMP_CTZ(result, mask) do { \
-		unsigned long index; \
-		if (!_BitScanForward(&(index), (mask))) { \
-			(result) = 32U; \
-		} else { \
-			(result) = (uint32_t)(index); \
-		} \
-	} while (0)
-#else
-# define SIMDCOMP_CTZ(result, mask) \
-	result = __builtin_ctz(mask)
-#endif
-
-#endif /* SIMDBITCOMPAT_H_ */
-
--- a/cpp/simdcomp/include/simdbitpacking.h
+++ b/cpp/simdcomp/include/simdbitpacking.h
@@ -1,72 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-#ifndef SIMDBITPACKING_H_
-#define SIMDBITPACKING_H_
-
-#include "portability.h"
-
-/* SSE2 is required */
-#include <emmintrin.h>
-/* for memset */
-#include <string.h>
-
-#include "simdcomputil.h"
-
-/***
-* Please see example.c for various examples on how to make good use
-* of these functions.
-*/
-
-
-
-/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out".
- * The input values are masked so that only the least significant "bit" bits are used. */
-void simdpack(const uint32_t *  in,__m128i *  out, const uint32_t bit);
-
-/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out".
- * The input values are assumed to be less than 1<<bit. */
-void simdpackwithoutmask(const uint32_t *  in,__m128i *  out, const uint32_t bit);
-
-/* reads  "bit" 128-bit vectors from "in", writes  128 values to "out" */
-void simdunpack(const __m128i *  in,uint32_t *  out, const uint32_t bit);
-
-
-
-/* how many compressed bytes are needed to compressed length integers using a bit width of bit with 
-the  simdpackFOR_length function. */
-int simdpack_compressedbytes(int length, const uint32_t bit);
-
-/* like simdpack, but supports an undetermined number of inputs.
- * This is useful if you need to unpack an array of integers that is not divisible by 128 integers.
- * Returns a pointer to the (advanced) compressed array. Compressed data is stored in the memory location between 
- the provided (out) pointer and the returned pointer. */
-__m128i * simdpack_length(const uint32_t *   in, size_t length, __m128i *    out, const uint32_t bit);
-
-/* like simdunpack, but supports an undetermined number of inputs.
- * This is useful if you need to unpack an array of integers that is not divisible by 128 integers.
- * Returns a pointer to the (advanced) compressed array. The read compressed data is between the provided 
- (in) pointer and the returned pointer. */
-const __m128i * simdunpack_length(const __m128i *   in, size_t length, uint32_t * out, const uint32_t bit);
-
-
-
-
-/* like simdpack, but supports an undetermined small number of inputs. This is useful if you need to pack less 
-than 128 integers.
- * Note that this function is much slower.
- * Returns a pointer to the (advanced) compressed array. Compressed data is stored in the memory location 
- between the provided (out) pointer and the returned pointer. */
-__m128i * simdpack_shortlength(const uint32_t *   in, int length, __m128i *    out, const uint32_t bit);
-
-/* like simdunpack, but supports an undetermined small number of inputs. This is useful if you need to unpack less
- than 128 integers.
- * Note that this function is much slower.
- * Returns a pointer to the (advanced) compressed array. The read compressed data is between the provided (in) 
- pointer and the returned pointer. */
-const __m128i * simdunpack_shortlength(const __m128i *   in, int length, uint32_t * out, const uint32_t bit);
-
-/* given a block of 128 packed values, this function sets the value at index "index" to "value" */
-void simdfastset(__m128i * in128, uint32_t b, uint32_t value, size_t index);
-
-#endif /* SIMDBITPACKING_H_ */
--- a/cpp/simdcomp/include/simdcomp.h
+++ b/cpp/simdcomp/include/simdcomp.h
@@ -1,22 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-
-#ifndef SIMDCOMP_H_
-#define SIMDCOMP_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "simdbitpacking.h"
-#include "simdcomputil.h"
-#include "simdfor.h"
-#include "simdintegratedbitpacking.h"
-#include "avxbitpacking.h"
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif 
--- a/cpp/simdcomp/include/simdcomputil.h
+++ b/cpp/simdcomp/include/simdcomputil.h
@@ -1,54 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-
-#ifndef SIMDCOMPUTIL_H_
-#define SIMDCOMPUTIL_H_
-
-#include "portability.h"
-
-/* SSE2 is required */
-#include <emmintrin.h>
-
-
-
-
-/* returns the integer logarithm of v (bit width) */
-uint32_t bits(const uint32_t v);
-
-/* max integer logarithm over a range of SIMDBlockSize integers (128 integer) */
-uint32_t maxbits(const uint32_t * begin);
-
-/* same as maxbits, but we specify the number of integers */
-uint32_t maxbits_length(const uint32_t * in,uint32_t length);
-
-enum{ SIMDBlockSize = 128};
-
-
-/* computes (quickly) the minimal value of 128 values */
-uint32_t simdmin(const uint32_t * in);
-
-/* computes (quickly) the minimal value of the specified number of values */
-uint32_t simdmin_length(const uint32_t * in, uint32_t length);
-
-#ifdef __SSE4_1__
-/* computes (quickly) the minimal and maximal value of the specified number of values */
-void simdmaxmin_length(const uint32_t * in, uint32_t length, uint32_t * getmin, uint32_t * getmax);
-
-/* computes (quickly) the minimal and maximal value of the 128 values */
-void simdmaxmin(const uint32_t * in, uint32_t * getmin, uint32_t * getmax);
-
-#endif
-
-/* like maxbit over 128 integers (SIMDBlockSize) with provided initial value 
-   and using differential coding */
-uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in);
-
-/* like simdmaxbitsd1, but calculates maxbits over |length| integers 
-   with provided initial value. |length| can be any arbitrary value. */
-uint32_t simdmaxbitsd1_length(uint32_t initvalue, const uint32_t * in,
-                uint32_t length);
-
-
-
-#endif /* SIMDCOMPUTIL_H_ */
--- a/cpp/simdcomp/include/simdfor.h
+++ b/cpp/simdcomp/include/simdfor.h
@@ -1,72 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-#ifndef INCLUDE_SIMDFOR_H_
-#define INCLUDE_SIMDFOR_H_
-
-#include "portability.h"
-
-/* SSE2 is required */
-#include <emmintrin.h>
-
-#include "simdcomputil.h"
-#include "simdbitpacking.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out" */
-void simdpackFOR(uint32_t initvalue, const uint32_t *  in,__m128i *  out, const uint32_t bit);
-
-
-/* reads "bit" 128-bit vectors from "in", writes  128 values to "out" */
-void simdunpackFOR(uint32_t initvalue, const __m128i *  in,uint32_t *  out, const uint32_t bit);
-
-
-/* how many compressed bytes are needed to compressed length integers using a bit width of bit with 
-the  simdpackFOR_length function. */
-int simdpackFOR_compressedbytes(int length, const uint32_t bit);
-
-/* like simdpackFOR, but supports an undetermined number of inputs. 
-This is useful if you need to pack less than 128 integers. Note that this function is much slower. 
- Compressed data is stored in the memory location between 
- the provided (out) pointer and the returned pointer. */
-__m128i * simdpackFOR_length(uint32_t initvalue, const uint32_t *   in, int length, __m128i *    out, const uint32_t bit);
-
-/* like simdunpackFOR, but supports an undetermined number of inputs. 
-This is useful if you need to unpack less than 128 integers. Note that this function is much slower. 
- The read compressed data is between the provided 
- (in) pointer and the returned pointer.  */
-const __m128i * simdunpackFOR_length(uint32_t initvalue, const __m128i *   in, int length, uint32_t * out, const uint32_t bit);
-
-
-/* returns the value stored at the specified "slot".
-* */
-uint32_t simdselectFOR(uint32_t initvalue, const __m128i *in, uint32_t bit,
-                int slot);
-
-/* given a block of 128 packed values, this function sets the value at index "index" to "value" */
-void simdfastsetFOR(uint32_t initvalue, __m128i * in, uint32_t bit, uint32_t value, size_t index);
-
-
-/* searches "bit" 128-bit vectors from "in" (= length<=128 encoded integers) for the first encoded uint32 value
- * which is >= |key|, and returns its position. It is assumed that the values
- * stored are in sorted order.
- * The encoded key is stored in "*presult".
- * The first length decoded integers, ignoring others. If no value is larger or equal to the key,
- * length is returned. Length should be no larger than 128.
- *
- * If no value is larger or equal to the key,
-* length is returned */
-int simdsearchwithlengthFOR(uint32_t initvalue, const __m128i *in, uint32_t bit,
-                int length, uint32_t key, uint32_t *presult);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-
-
-
-#endif /* INCLUDE_SIMDFOR_H_ */
--- a/cpp/simdcomp/include/simdintegratedbitpacking.h
+++ b/cpp/simdcomp/include/simdintegratedbitpacking.h
@@ -1,98 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-
-#ifndef SIMD_INTEGRATED_BITPACKING_H
-#define SIMD_INTEGRATED_BITPACKING_H
-
-#include "portability.h"
-
-/* SSE2 is required */
-#include <emmintrin.h>
-
-#include "simdcomputil.h"
-#include "simdbitpacking.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out"
-   integer values should be in sorted order (for best results).
-   The differences are masked so that only the least significant "bit" bits are used. */
-void simdpackd1(uint32_t initvalue, const uint32_t *  in,__m128i *  out, const uint32_t bit);
-
-
-/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out"
-   integer values should be in sorted order (for best results).
-   The difference values are assumed to be less than 1<<bit. */
-void simdpackwithoutmaskd1(uint32_t initvalue, const uint32_t *  in,__m128i *  out, const uint32_t bit);
-
-
-/* reads "bit" 128-bit vectors from "in", writes  128 values to "out" */
-void simdunpackd1(uint32_t initvalue, const __m128i *  in,uint32_t *  out, const uint32_t bit);
-
-
-/* searches "bit" 128-bit vectors from "in" (= 128 encoded integers) for the first encoded uint32 value
- * which is >= |key|, and returns its position. It is assumed that the values
- * stored are in sorted order.
- * The encoded key is stored in "*presult". If no value is larger or equal to the key,
-* 128 is returned. The pointer initOffset is a pointer to the last four value decoded
-* (when starting out, this can be a zero vector or initialized with _mm_set1_epi32(init)),
-* and the vector gets updated.
-**/
-int
-simdsearchd1(__m128i * initOffset, const __m128i *in, uint32_t bit,
-                uint32_t key, uint32_t *presult);
-
-
-/* searches "bit" 128-bit vectors from "in" (= length<=128 encoded integers) for the first encoded uint32 value
- * which is >= |key|, and returns its position. It is assumed that the values
- * stored are in sorted order.
- * The encoded key is stored in "*presult".
- * The first length decoded integers, ignoring others. If no value is larger or equal to the key,
- * length is returned. Length should be no larger than 128.
- *
- * If no value is larger or equal to the key,
-* length is returned */
-int simdsearchwithlengthd1(uint32_t initvalue, const __m128i *in, uint32_t bit,
-                int length, uint32_t key, uint32_t *presult);
-
-
-
-/* returns the value stored at the specified "slot".
-* */
-uint32_t simdselectd1(uint32_t initvalue, const __m128i *in, uint32_t bit,
-                int slot);
-
-/* given a block of 128 packed values, this function sets the value at index "index" to "value",
- * you must somehow know the previous value.
- * Because of differential coding, all following values are incremented by the offset between this new
- * value and the old value... 
- * This functions is useful if you want to modify the last value. 
- */
-void simdfastsetd1fromprevious( __m128i * in, uint32_t bit, uint32_t previousvalue, uint32_t value, size_t index);
-
-/* given a block of 128 packed values, this function sets the value at index "index" to "value",
- * This function computes the previous value if needed.
- * Because of differential coding, all following values are incremented by the offset between this new
- * value and the old value...
- * This functions is useful if you want to modify the last value. 
- */
-void simdfastsetd1(uint32_t initvalue, __m128i * in, uint32_t bit, uint32_t value, size_t index);
-
-
-/*Simply scan the data
-* The pointer initOffset is a pointer to the last four value decoded
-* (when starting out, this can be a zero vector or initialized with _mm_set1_epi32(init);),
-* and the vector gets updated.
-* */
-
-void
-simdscand1(__m128i * initOffset, const __m128i *in, uint32_t bit);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif
--- a/cpp/simdcomp/makefile
+++ b/cpp/simdcomp/makefile
@@ -1,79 +0,0 @@
-# minimalist makefile
-.SUFFIXES:
-#
-.SUFFIXES: .cpp .o .c .h
-ifeq ($(DEBUG),1)
-CFLAGS = -fPIC  -std=c89 -ggdb -msse4.1 -march=native -Wall -Wextra -Wshadow -fsanitize=undefined  -fno-omit-frame-pointer -fsanitize=address
-else
-CFLAGS = -fPIC -std=c89 -O3 -msse4.1  -march=native -Wall -Wextra -Wshadow
-endif # debug
-LDFLAGS = -shared
-LIBNAME=libsimdcomp.so.0.0.3
-all:  unit unit_chars bitpackingbenchmark $(LIBNAME)
-test:
-	./unit
-	./unit_chars
-install: $(OBJECTS)
-	cp $(LIBNAME) /usr/local/lib
-	ln -s /usr/local/lib/$(LIBNAME) /usr/local/lib/libsimdcomp.so
-	ldconfig
-	cp $(HEADERS) /usr/local/include
-
-
-
-HEADERS=./include/simdbitpacking.h ./include/simdcomputil.h ./include/simdintegratedbitpacking.h ./include/simdcomp.h ./include/simdfor.h ./include/avxbitpacking.h
-
-uninstall:
-	for h in $(HEADERS) ; do rm  /usr/local/$$h; done
-	rm  /usr/local/lib/$(LIBNAME)
-	rm /usr/local/lib/libsimdcomp.so
-	ldconfig
-
-
-OBJECTS= simdbitpacking.o simdintegratedbitpacking.o simdcomputil.o \
-		 simdpackedsearch.o simdpackedselect.o simdfor.o avxbitpacking.o
-
-$(LIBNAME): $(OBJECTS)
-	$(CC) $(CFLAGS) -o $(LIBNAME) $(OBJECTS)  $(LDFLAGS)
-
-
-avxbitpacking.o: ./src/avxbitpacking.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/avxbitpacking.c -Iinclude
-
-
-simdfor.o: ./src/simdfor.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/simdfor.c -Iinclude
-
-
-simdcomputil.o: ./src/simdcomputil.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/simdcomputil.c -Iinclude
-
-simdbitpacking.o: ./src/simdbitpacking.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/simdbitpacking.c -Iinclude
-
-simdintegratedbitpacking.o: ./src/simdintegratedbitpacking.c  $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/simdintegratedbitpacking.c -Iinclude
-
-simdpackedsearch.o: ./src/simdpackedsearch.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/simdpackedsearch.c -Iinclude
-
-simdpackedselect.o: ./src/simdpackedselect.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/simdpackedselect.c -Iinclude
-
-example: ./example.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o example ./example.c -Iinclude  $(OBJECTS)
-
-unit: ./tests/unit.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o unit ./tests/unit.c -Iinclude  $(OBJECTS)
-
-bitpackingbenchmark: ./benchmarks/bitpackingbenchmark.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o bitpackingbenchmark ./benchmarks/bitpackingbenchmark.c -Iinclude  $(OBJECTS)
-benchmark: ./benchmarks/benchmark.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o benchmark ./benchmarks/benchmark.c -Iinclude  $(OBJECTS)
-dynunit: ./tests/unit.c    $(HEADERS) $(LIBNAME)
-	$(CC) $(CFLAGS) -o dynunit ./tests/unit.c -Iinclude  -lsimdcomp
-
-unit_chars: ./tests/unit_chars.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o unit_chars ./tests/unit_chars.c -Iinclude  $(OBJECTS)
-clean:
-	rm -f unit *.o $(LIBNAME) example benchmark bitpackingbenchmark dynunit unit_chars
--- a/cpp/simdcomp/makefile.vc
+++ b/cpp/simdcomp/makefile.vc
@@ -1,104 +0,0 @@
-
-!IFNDEF MACHINE
-!IF "$(PROCESSOR_ARCHITECTURE)"=="AMD64"
-MACHINE=x64
-!ELSE
-MACHINE=x86
-!ENDIF
-!ENDIF
-
-!IFNDEF DEBUG
-DEBUG=no
-!ENDIF
-
-!IFNDEF CC
-CC=cl.exe
-!ENDIF
-
-!IFNDEF AR
-AR=lib.exe
-!ENDIF
-
-!IFNDEF LINK
-LINK=link.exe
-!ENDIF
-
-!IFNDEF PGO
-PGO=no
-!ENDIF
-
-!IFNDEF PGI
-PGI=no
-!ENDIF
-
-INC = /Iinclude
-
-!IF "$(DEBUG)"=="yes"
-CFLAGS = /nologo /MDd /LDd /Od /Zi /D_DEBUG /RTC1 /W3 /GS /Gm
-ARFLAGS = /nologo
-LDFLAGS = /nologo /debug /nodefaultlib:msvcrt
-!ELSE
-CFLAGS = /nologo /MD /O2 /Zi /DNDEBUG /W3 /Gm- /GS /Gy /Oi /GL /MP
-ARFLAGS = /nologo /LTCG
-LDFLAGS = /nologo /LTCG /DYNAMICBASE /incremental:no /debug /opt:ref,icf
-!ENDIF
-
-!IF "$(PGI)"=="yes"
-LDFLAGS = $(LDFLAGS) /ltcg:pgi
-!ENDIF
-
-!IF "$(PGO)"=="yes"
-LDFLAGS = $(LDFLAGS) /ltcg:pgo
-!ENDIF
-
-LIB_OBJS = simdbitpacking.obj simdintegratedbitpacking.obj simdcomputil.obj \
-	simdpackedsearch.obj simdpackedselect.obj simdfor.obj
-
-
-all: lib dll dynunit unit_chars example benchmark
-# need some good use case scenario to train the instrumented build
-	@if "$(PGI)"=="yes" echo Running PGO training
-	@if "$(PGI)"=="yes" benchmark.exe >nul 2>&1
-	@if "$(PGI)"=="yes" example.exe >nul 2>&1
-
-
-$(LIB_OBJS):
-	$(CC) $(INC) $(CFLAGS) /c src/simdbitpacking.c src/simdintegratedbitpacking.c src/simdcomputil.c \
-		src/simdpackedsearch.c src/simdpackedselect.c src/simdfor.c
-
-lib: $(LIB_OBJS)
-	$(AR) $(ARFLAGS) /OUT:simdcomp_a.lib $(LIB_OBJS)
-
-dll: $(LIB_OBJS)
-	$(LINK) /DLL $(LDFLAGS) /OUT:simdcomp.dll /IMPLIB:simdcomp.lib /DEF:simdcomp.def $(LIB_OBJS)
-
-unit: lib
-	$(CC) $(INC) $(CFLAGS) /c src/unit.c 
-	$(LINK) $(LDFLAGS) /OUT:unit.exe unit.obj simdcomp_a.lib
-
-dynunit: dll
-	$(CC) $(INC) $(CFLAGS) /c src/unit.c 
-	$(LINK) $(LDFLAGS) /OUT:unit.exe unit.obj simdcomp.lib
-
-unit_chars: lib
-	$(CC) $(INC) $(CFLAGS) /c src/unit_chars.c
-	$(LINK) $(LDFLAGS) /OUT:unit_chars.exe unit_chars.obj simdcomp.lib
-
-
-example: lib
-	$(CC) $(INC) $(CFLAGS) /c example.c
-	$(LINK) $(LDFLAGS) /OUT:example.exe example.obj simdcomp.lib
-
-benchmark: lib
-	$(CC) $(INC) $(CFLAGS) /c src/benchmark.c
-	$(LINK) $(LDFLAGS) /OUT:benchmark.exe benchmark.obj simdcomp.lib
-
-clean:
-	del /Q *.obj
-	del /Q *.lib
-	del /Q *.exe
-	del /Q *.dll
-	del /Q *.pgc
-	del /Q *.pgd
-	del /Q *.pdb
-
--- a/cpp/simdcomp/package.json
+++ b/cpp/simdcomp/package.json
@@ -1,16 +0,0 @@
-{
-  "name": "simdcomp",
-  "version": "0.0.3",
-  "repo": "lemire/simdcomp",
-  "description": "A simple C library for compressing lists of integers",
-  "license": "BSD-3-Clause",
-  "src": [
-    "src/simdbitpacking.c",
-    "src/simdcomputil.c",
-    "src/simdintegratedbitpacking.c",
-    "include/simdbitpacking.h",
-    "include/simdcomp.h",
-    "include/simdcomputil.h",
-    "include/simdintegratedbitpacking.h"
-  ]
-}
--- a/cpp/simdcomp/scripts/avxpacking.py
+++ b/cpp/simdcomp/scripts/avxpacking.py
@@ -1,182 +0,0 @@
-#!/usr/bin/env python
-import sys
-def howmany(bit):
-    """ how many values are we going to pack? """
-    return 256
-
-def howmanywords(bit):
-    return (howmany(bit) * bit + 255)/256
-
-def howmanybytes(bit):
-    return howmanywords(bit) * 16
-
-print("""
-/** code generated by avxpacking.py starts here **/
-""")
-
-print("""typedef void (*avxpackblockfnc)(const uint32_t * pin, __m256i * compressed);""")
-print("""typedef void (*avxunpackblockfnc)(const __m256i * compressed, uint32_t * pout);""")
-
-
-
-
-
-
-def plurial(number):
-    if(number <> 1):
-        return "s"
-    else :
-        return ""
-
-print("")
-print("static void avxpackblock0(const uint32_t * pin, __m256i * compressed) {");
-print("  (void)compressed;");
-print("  (void) pin; /* we consumed {0} 32-bit integer{1} */ ".format(howmany(0),plurial(howmany(0))));
-print("}");
-print("")
-
-for bit in range(1,33):
-    print("")
-    print("/* we are going to pack {0} {1}-bit values, touching {2} 256-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
-    print("static void avxpackblock{0}(const uint32_t * pin, __m256i * compressed) {{".format(bit));
-    print("  const __m256i * in = (const __m256i *)  pin;");
-    print("  /* we are going to touch  {0} 256-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
-    if(howmanywords(bit) == 1):
-      print("  __m256i w0;")
-    else:
-      print("  __m256i w0, w1;")
-    if( (bit & (bit-1)) <> 0) : print("  __m256i tmp; /* used to store inputs at word boundary */")
-    oldword = 0
-    for j in range(howmany(bit)/8):
-      firstword = j * bit / 32
-      if(firstword > oldword):
-        print("  _mm256_storeu_si256(compressed + {0}, w{1});".format(oldword,oldword%2))
-        oldword = firstword
-      secondword = (j * bit + bit - 1)/32
-      firstshift = (j*bit) % 32
-      if( firstword == secondword):
-          if(firstshift == 0):
-            print("  w{0} = _mm256_lddqu_si256 (in + {1});".format(firstword%2,j))
-          else:
-            print("  w{0} = _mm256_or_si256(w{0},_mm256_slli_epi32(_mm256_lddqu_si256 (in + {1}) , {2}));".format(firstword%2,j,firstshift))
-      else:
-          print("  tmp = _mm256_lddqu_si256 (in + {0});".format(j))
-          print("  w{0} = _mm256_or_si256(w{0},_mm256_slli_epi32(tmp , {2}));".format(firstword%2,j,firstshift))
-          secondshift = 32-firstshift
-          print("  w{0} = _mm256_srli_epi32(tmp,{2});".format(secondword%2,j,secondshift))
-    print("  _mm256_storeu_si256(compressed + {0}, w{1});".format(secondword,secondword%2))
-    print("}");
-    print("")
-
-
-print("")
-print("static void avxpackblockmask0(const uint32_t * pin, __m256i * compressed) {");
-print("  (void)compressed;");
-print("  (void) pin; /* we consumed {0} 32-bit integer{1} */ ".format(howmany(0),plurial(howmany(0))));
-print("}");
-print("")
-
-for bit in range(1,33):
-    print("")
-    print("/* we are going to pack {0} {1}-bit values, touching {2} 256-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
-    print("static void avxpackblockmask{0}(const uint32_t * pin, __m256i * compressed) {{".format(bit));
-    print("  /* we are going to touch  {0} 256-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
-    if(howmanywords(bit) == 1):
-      print("  __m256i w0;")
-    else:
-      print("  __m256i w0, w1;")
-    print("  const __m256i * in = (const __m256i *) pin;");
-    if(bit < 32): print("  const __m256i mask = _mm256_set1_epi32({0});".format((1<<bit)-1));
-    def maskfnc(x):
-        if(bit == 32): return x
-        return " _mm256_and_si256 ( mask, {0}) ".format(x)
-    if( (bit & (bit-1)) <> 0) : print("  __m256i tmp; /* used to store inputs at word boundary */")
-    oldword = 0
-    for j in range(howmany(bit)/8):
-      firstword = j * bit / 32
-      if(firstword > oldword):
-        print("  _mm256_storeu_si256(compressed + {0}, w{1});".format(oldword,oldword%2))
-        oldword = firstword
-      secondword = (j * bit + bit - 1)/32
-      firstshift = (j*bit) % 32
-      loadstr = maskfnc(" _mm256_lddqu_si256 (in + {0}) ".format(j))
-      if( firstword == secondword):
-          if(firstshift == 0):
-            print("  w{0} = {1};".format(firstword%2,loadstr))
-          else:
-            print("  w{0} = _mm256_or_si256(w{0},_mm256_slli_epi32({1} , {2}));".format(firstword%2,loadstr,firstshift))
-      else:
-          print("  tmp = {0};".format(loadstr))
-          print("  w{0} = _mm256_or_si256(w{0},_mm256_slli_epi32(tmp , {2}));".format(firstword%2,j,firstshift))
-          secondshift = 32-firstshift
-          print("  w{0} = _mm256_srli_epi32(tmp,{2});".format(secondword%2,j,secondshift))
-    print("  _mm256_storeu_si256(compressed + {0}, w{1});".format(secondword,secondword%2))
-    print("}");
-    print("")
-
-
-print("static void avxunpackblock0(const __m256i * compressed, uint32_t * pout) {");
-print("  (void) compressed;");
-print("  memset(pout,0,{0});".format(howmany(0)));
-print("}");
-print("")
-
-for bit in range(1,33):
-    print("")
-    print("/* we packed {0} {1}-bit values, touching {2} 256-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
-    print("static void avxunpackblock{0}(const __m256i * compressed, uint32_t * pout) {{".format(bit));
-    print("  /* we are going to access  {0} 256-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
-    if(howmanywords(bit) == 1):
-      print("  __m256i w0;")
-    else:
-      print("  __m256i w0, w1;")
-    print("  __m256i * out = (__m256i *) pout;");
-    if(bit < 32): print("  const __m256i mask = _mm256_set1_epi32({0});".format((1<<bit)-1));
-    maskstr = " _mm256_and_si256 ( mask, {0}) "
-    if (bit == 32) : maskstr = " {0} " # no need
-    oldword = 0
-    print("  w0 = _mm256_lddqu_si256 (compressed);")
-    for j in range(howmany(bit)/8):
-      firstword = j * bit / 32
-      secondword = (j * bit + bit - 1)/32
-      if(secondword > oldword):
-        print("  w{0} = _mm256_lddqu_si256 (compressed + {1});".format(secondword%2,secondword))
-        oldword = secondword
-      firstshift = (j*bit) % 32
-      firstshiftstr = "_mm256_srli_epi32( w{0} , "+str(firstshift)+") "
-      if(firstshift == 0):
-          firstshiftstr =" w{0} " # no need
-      wfirst = firstshiftstr.format(firstword%2)
-      if( firstword == secondword):
-          if(firstshift + bit <> 32):
-            wfirst  = maskstr.format(wfirst)
-          print("  _mm256_storeu_si256(out + {0}, {1});".format(j,wfirst))
-      else:
-          secondshift = (32-firstshift)
-          wsecond = "_mm256_slli_epi32( w{0} , {1} ) ".format((firstword+1)%2,secondshift)
-          wfirstorsecond = " _mm256_or_si256 ({0},{1}) ".format(wfirst,wsecond)
-          wfirstorsecond = maskstr.format(wfirstorsecond)
-          print("  _mm256_storeu_si256(out + {0},\n    {1});".format(j,wfirstorsecond))
-    print("}");
-    print("")
-
-
-print("static avxpackblockfnc avxfuncPackArr[] = {")
-for bit in range(0,32):
-  print("&avxpackblock{0},".format(bit))
-print("&avxpackblock32")
-print("};")
-
-print("static avxpackblockfnc avxfuncPackMaskArr[] = {")
-for bit in range(0,32):
-  print("&avxpackblockmask{0},".format(bit))
-print("&avxpackblockmask32")
-print("};")
-
-
-print("static avxunpackblockfnc avxfuncUnpackArr[] = {")
-for bit in range(0,32):
-  print("&avxunpackblock{0},".format(bit))
-print("&avxunpackblock32")
-print("};")
-print("/** code generated by avxpacking.py ends here **/")
--- a/cpp/simdcomp/scripts/simdfor.py
+++ b/cpp/simdcomp/scripts/simdfor.py
@@ -1,152 +0,0 @@
-#!/usr/bin/env python3
-
-
-from math import ceil
-
-print("""
-/**
-* Blablabla
-*
-*/
-
-""");
-
-def mask(bit):
-  return str((1 << bit) - 1)
-
-for length in [32]:
-  print("""
-static __m128i  iunpackFOR0(__m128i initOffset, const __m128i *   _in , uint32_t *    _out) {
-    __m128i       *out = (__m128i*)(_out);
-    int i;
-    (void) _in;
-    for (i = 0; i < 8; ++i) {
-        _mm_store_si128(out++, initOffset);
-    	_mm_store_si128(out++, initOffset);
-        _mm_store_si128(out++, initOffset);
-        _mm_store_si128(out++, initOffset);
-    }
-
-    return initOffset;
-}
-
-  """)
-  print("""
-
-static void ipackFOR0(__m128i initOffset , const uint32_t *   _in , __m128i *  out  ) {
-    (void) initOffset;
-    (void) _in;
-    (void) out;
-}
-""") 
-  for bit in range(1,33):
-    offsetVar = " initOffset";
-    print("""  
-static void ipackFOR"""+str(bit)+"""(__m128i """+offsetVar+""", const uint32_t *   _in, __m128i *   out) {
-    const __m128i       *in = (const __m128i*)(_in);
-    __m128i    OutReg;
-
-      """);
-    
-    if (bit != 32):
-      print("    __m128i CurrIn = _mm_load_si128(in);");
-      print("    __m128i InReg = _mm_sub_epi32(CurrIn, initOffset);");
-    else:
-      print("    __m128i InReg = _mm_load_si128(in);");
-      print("    (void) initOffset;");
-
-
-    inwordpointer = 0
-    valuecounter = 0
-    for k in range(ceil((length * bit) / 32)):
-      if(valuecounter == length): break
-      for x in range(inwordpointer,32,bit):
-        if(x!=0) :
-          print("    OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, " + str(x) + "));");
-        else:
-          print("    OutReg = InReg; ");
-        if((x+bit>=32) ):
-          while(inwordpointer<32):
-            inwordpointer += bit
-          print("    _mm_store_si128(out, OutReg);");
-          print("");
-
-          if(valuecounter + 1 < length):
-            print("    ++out;")
-          inwordpointer -= 32;
-          if(inwordpointer>0):
-            print("    OutReg = _mm_srli_epi32(InReg, " + str(bit) + " - " + str(inwordpointer) + ");");
-        if(valuecounter + 1 < length):
-          print("    ++in;") 
-
-          if (bit != 32):
-            print("    CurrIn = _mm_load_si128(in);");
-            print("    InReg = _mm_sub_epi32(CurrIn, initOffset);");
-          else:
-            print("    InReg = _mm_load_si128(in);");
-          print("");
-        valuecounter = valuecounter + 1
-        if(valuecounter == length): break
-    assert(valuecounter == length)
-    print("\n}\n\n""")
-
-  for bit in range(1,32):
-    offsetVar = " initOffset";
-    print("""\n
-static __m128i iunpackFOR"""+str(bit)+"""(__m128i """+offsetVar+""", const  __m128i*   in, uint32_t *   _out) {
-      """);
-    print("""    __m128i*   out = (__m128i*)(_out);
-    __m128i    InReg = _mm_load_si128(in);
-    __m128i    OutReg;    
-    __m128i     tmp;
-    const __m128i mask =  _mm_set1_epi32((1U<<"""+str(bit)+""")-1);
-
-    """);
-
-    MainText = "";
-
-    MainText += "\n";
-    inwordpointer = 0
-    valuecounter = 0
-    for k in range(ceil((length * bit) / 32)):
-      for x in range(inwordpointer,32,bit):
-        if(valuecounter == length): break
-        if (x > 0):
-          MainText += "    tmp = _mm_srli_epi32(InReg," + str(x) +");\n"; 
-        else:
-          MainText += "    tmp = InReg;\n"; 
-        if(x+bit<32):
-          MainText += "    OutReg = _mm_and_si128(tmp, mask);\n";
-        else:
-          MainText += "    OutReg = tmp;\n";        
-        if((x+bit>=32) ):      
-          while(inwordpointer<32):
-            inwordpointer += bit
-          if(valuecounter + 1 < length):
-             MainText += "    ++in;"
-             MainText += "    InReg = _mm_load_si128(in);\n";
-          inwordpointer -= 32;
-          if(inwordpointer>0):
-            MainText += "    OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, " + str(bit) + "-" + str(inwordpointer) + "), mask));\n\n";
-        if (bit != 32):
-          MainText += "    OutReg = _mm_add_epi32(OutReg, initOffset);\n"; 
-        MainText += "    _mm_store_si128(out++, OutReg);\n\n"; 
-        MainText += "";
-        valuecounter = valuecounter + 1
-        if(valuecounter == length): break
-    assert(valuecounter == length)
-    print(MainText)
-    print("    return initOffset;");
-    print("\n}\n\n")
-  print("""
-static __m128i iunpackFOR32(__m128i initvalue , const  __m128i*   in, uint32_t *    _out) {
-	__m128i * mout = (__m128i *)_out;
-	__m128i invec;
-	size_t k;
-	for(k = 0; k < 128/4; ++k) {
-		invec =  _mm_load_si128(in++);
-	    _mm_store_si128(mout++, invec);
-	}
-	return invec;
-}
-  """)
--- a/cpp/simdcomp/simdcomp.def
+++ b/cpp/simdcomp/simdcomp.def
@@ -1,40 +0,0 @@
-EXPORTS
-	simdpack
-	simdpackwithoutmask
-	simdunpack
-	bits
-	maxbits
-	maxbits_length
-	simdmin
-	simdmin_length
-	simdmaxmin
-	simdmaxmin_length
-	simdmaxbitsd1
-	simdmaxbitsd1_length
-	simdpackd1
-	simdpackwithoutmaskd1
-	simdunpackd1
-	simdsearchd1
-	simdsearchwithlengthd1
-	simdselectd1
-	simdpackFOR
-	simdselectFOR
-	simdsearchwithlengthFOR
-	simdunpackFOR
-	simdmin_length
-	simdmaxmin
-	simdmaxmin_length
-	simdpack_length
-	simdpackFOR_length
-	simdunpackFOR_length
-	simdpack_shortlength
-	simdfastsetFOR
-	simdfastset
-	simdfastsetd1
-	simdunpack_length
-	simdunpack_shortlength
-	simdsearchwithlengthFOR
-	simdscand1
-	simdfastsetd1fromprevious
-	simdfastsetd1
-
--- a/cpp/simdcomp/src/avxbitpacking.c
+++ b/cpp/simdcomp/src/avxbitpacking.c
--- a/cpp/simdcomp/src/simdbitpacking.c
+++ b/cpp/simdcomp/src/simdbitpacking.c
--- a/cpp/simdcomp/src/simdcomputil.c
+++ b/cpp/simdcomp/src/simdcomputil.c
@@ -1,234 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-
-#include "simdcomputil.h"
-#ifdef __SSE4_1__
-#include <smmintrin.h>
-#endif
-#include <assert.h>
-
-#define Delta(curr, prev) \
-    _mm_sub_epi32(curr, \
-            _mm_or_si128(_mm_slli_si128(curr, 4), _mm_srli_si128(prev, 12)))
-
-/* returns the integer logarithm of v (bit width) */
-uint32_t bits(const uint32_t v) {
-#ifdef _MSC_VER
-    unsigned long answer;
-    if (v == 0) {
-        return 0;
-    }
-    _BitScanReverse(&answer, v);
-    return answer + 1;
-#else
-    return v == 0 ? 0 : 32 - __builtin_clz(v); /* assume GCC-like compiler if not microsoft */
-#endif
-}
-
-
-
-static uint32_t maxbitas32int(const __m128i accumulator) {
-	const __m128i _tmp1 = _mm_or_si128(_mm_srli_si128(accumulator, 8), accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/
-	const __m128i _tmp2 = _mm_or_si128(_mm_srli_si128(_tmp1, 4), _tmp1); /*  (A,B,C xor A,D xor B) xor  (0,0,0,C xor A)*/
-	uint32_t ans =  _mm_cvtsi128_si32(_tmp2);
-	return bits(ans);
-}
-
-SIMDCOMP_PURE uint32_t maxbits(const uint32_t * begin) {
-	    const __m128i* pin = (const __m128i*)(begin);
-	    __m128i accumulator = _mm_loadu_si128(pin);
-	    uint32_t k = 1;
-	    for(; 4*k < SIMDBlockSize; ++k) {
-	    	__m128i newvec = _mm_loadu_si128(pin+k);
-	        accumulator = _mm_or_si128(accumulator,newvec);
-	    }
-	    return maxbitas32int(accumulator);
-}
-static uint32_t orasint(const __m128i accumulator) {
-	const __m128i _tmp1 = _mm_or_si128(_mm_srli_si128(accumulator, 8), accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/
-	const __m128i _tmp2 = _mm_or_si128(_mm_srli_si128(_tmp1, 4), _tmp1); /*  (A,B,C xor A,D xor B) xor  (0,0,0,C xor A)*/
-	return  _mm_cvtsi128_si32(_tmp2);
-}
-
-#ifdef __SSE4_1__
-
-static uint32_t minasint(const __m128i accumulator) {
-	const __m128i _tmp1 = _mm_min_epu32(_mm_srli_si128(accumulator, 8), accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/
-	const __m128i _tmp2 = _mm_min_epu32(_mm_srli_si128(_tmp1, 4), _tmp1); /*  (A,B,C xor A,D xor B) xor  (0,0,0,C xor A)*/
-	return  _mm_cvtsi128_si32(_tmp2);
-}
-
-static uint32_t maxasint(const __m128i accumulator) {
-	const __m128i _tmp1 = _mm_max_epu32(_mm_srli_si128(accumulator, 8), accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/
-	const __m128i _tmp2 = _mm_max_epu32(_mm_srli_si128(_tmp1, 4), _tmp1); /*  (A,B,C xor A,D xor B) xor  (0,0,0,C xor A)*/
-	return  _mm_cvtsi128_si32(_tmp2);
-}
-
-uint32_t simdmin(const uint32_t * in) {
-    const __m128i* pin = (const __m128i*)(in);
-    __m128i accumulator =  _mm_loadu_si128(pin);
-     uint32_t k = 1;
-     for(; 4*k < SIMDBlockSize; ++k) {
-    	 __m128i newvec = _mm_loadu_si128(pin+k);
-         accumulator = _mm_min_epu32(accumulator,newvec);
-     }
-     return minasint(accumulator);
-}
-
-void simdmaxmin(const uint32_t * in, uint32_t * getmin, uint32_t * getmax) {
-    const __m128i* pin = (const __m128i*)(in);
-    __m128i minaccumulator =  _mm_loadu_si128(pin);
-    __m128i maxaccumulator =  minaccumulator;
-    uint32_t k = 1;
-     for(; 4*k < SIMDBlockSize; ++k) {
-    	 __m128i newvec = _mm_loadu_si128(pin+k);
-         minaccumulator = _mm_min_epu32(minaccumulator,newvec);
-         maxaccumulator = _mm_max_epu32(maxaccumulator,newvec);
-     }
-     *getmin = minasint(minaccumulator);
-     *getmax = maxasint(maxaccumulator);
-}
-
-
-uint32_t simdmin_length(const uint32_t * in, uint32_t length) {
-	uint32_t currentmin = 0xFFFFFFFF;
-	uint32_t lengthdividedby4 = length / 4;
-	uint32_t offset = lengthdividedby4 * 4;
-	uint32_t k;
-	if (lengthdividedby4 > 0) {
-		const __m128i* pin = (const __m128i*)(in);
-		__m128i accumulator = _mm_loadu_si128(pin);
-		k = 1;
-		for(; 4*k < lengthdividedby4 * 4; ++k) {
-			__m128i newvec = _mm_loadu_si128(pin+k);
-			accumulator = _mm_min_epu32(accumulator,newvec);
-		}
-		currentmin = minasint(accumulator);
-	}
-	for (k = offset; k < length; ++k)
-		if (in[k] < currentmin)
-			currentmin = in[k];
-	return currentmin;
-}
-
-void simdmaxmin_length(const uint32_t * in, uint32_t length, uint32_t * getmin, uint32_t * getmax) {
-	uint32_t lengthdividedby4 = length / 4;
-	uint32_t offset = lengthdividedby4 * 4;
-	uint32_t k;
-	*getmin = 0xFFFFFFFF;
-	*getmax = 0;
-	if (lengthdividedby4 > 0) {
-		const __m128i* pin = (const __m128i*)(in);
-		__m128i minaccumulator = _mm_loadu_si128(pin);
-		__m128i maxaccumulator = minaccumulator;
-		k = 1;
-		for(; 4*k < lengthdividedby4 * 4; ++k) {
-			__m128i newvec = _mm_loadu_si128(pin+k);
-			minaccumulator = _mm_min_epu32(minaccumulator,newvec);
-			maxaccumulator = _mm_max_epu32(maxaccumulator,newvec);
-		}
-		*getmin = minasint(minaccumulator);
-		*getmax = maxasint(maxaccumulator);
-	}
-	for (k = offset; k < length; ++k) {
-		if (in[k] < *getmin)
-			*getmin = in[k];
-		if (in[k] > *getmax)
-			*getmax = in[k];
-	}
-}
-
-#endif
-
-SIMDCOMP_PURE uint32_t maxbits_length(const uint32_t * in,uint32_t length) {
-	  uint32_t k;
-	  uint32_t lengthdividedby4 = length / 4;
-	  uint32_t offset = lengthdividedby4 * 4;
-	  uint32_t bigxor = 0;
-	  if(lengthdividedby4 > 0) {
-		    const __m128i* pin = (const __m128i*)(in);
-		    __m128i accumulator = _mm_loadu_si128(pin);
-		    k = 1;
-		    for(; 4*k < 4*lengthdividedby4; ++k) {
-		    	__m128i newvec = _mm_loadu_si128(pin+k);
-		        accumulator = _mm_or_si128(accumulator,newvec);
-		    }
-		    bigxor = orasint(accumulator);
-	  }
-	  for(k = offset; k < length; ++k)
-		  bigxor |= in[k];
-	  return bits(bigxor);
-}
-
-
-/* maxbit over 128 integers (SIMDBlockSize) with provided initial value */
-uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in) {
-    __m128i  initoffset = _mm_set1_epi32 (initvalue);
-    const __m128i* pin = (const __m128i*)(in);
-    __m128i newvec = _mm_loadu_si128(pin);
-    __m128i accumulator = Delta(newvec , initoffset);
-    __m128i oldvec = newvec;
-    uint32_t k = 1;
-    for(; 4*k < SIMDBlockSize; ++k) {
-        newvec = _mm_loadu_si128(pin+k);
-        accumulator = _mm_or_si128(accumulator,Delta(newvec , oldvec));
-        oldvec = newvec;
-    }
-    initoffset = oldvec;
-    return maxbitas32int(accumulator);
-}
-
-
-/* maxbit over |length| integers with provided initial value */
-uint32_t simdmaxbitsd1_length(uint32_t initvalue, const uint32_t * in,
-                uint32_t length) {
-    __m128i newvec;
-    __m128i oldvec;
-    __m128i initoffset;
-    __m128i accumulator;
-    const __m128i *pin;
-    uint32_t tmparray[4];
-    uint32_t k = 1;
-    uint32_t acc;
-
-    assert(length > 0);
-
-    pin = (const __m128i *)(in);
-    initoffset = _mm_set1_epi32(initvalue);
-    switch (length) {
-      case 1:
-        newvec = _mm_set1_epi32(in[0]);
-        break;
-      case 2:
-        newvec = _mm_setr_epi32(in[0], in[1], in[1], in[1]);
-        break;
-      case 3:
-        newvec = _mm_setr_epi32(in[0], in[1], in[2], in[2]);
-        break;
-      default:
-        newvec = _mm_loadu_si128(pin);
-        break;
-    }
-    accumulator = Delta(newvec, initoffset);
-    oldvec = newvec;
-
-    /* process 4 integers and build an accumulator */
-    while (k * 4 + 4 <= length) {
-        newvec = _mm_loadu_si128(pin + k);
-        accumulator = _mm_or_si128(accumulator, Delta(newvec, oldvec));
-        oldvec = newvec;
-        k++;
-    }
-
-    /* extract the accumulator as an integer */
-    _mm_storeu_si128((__m128i *)(tmparray), accumulator);
-    acc = tmparray[0] | tmparray[1] | tmparray[2] | tmparray[3];
-
-    /* now process the remaining integers */
-    for (k *= 4; k < length; k++)
-        acc |= in[k] - (k == 0 ? initvalue : in[k - 1]);
-
-    /* return the number of bits */
-    return bits(acc);
-}
--- a/cpp/simdcomp/src/simdfor.c
+++ b/cpp/simdcomp/src/simdfor.c
--- a/cpp/simdcomp/src/simdintegratedbitpacking.c
+++ b/cpp/simdcomp/src/simdintegratedbitpacking.c
--- a/cpp/simdcomp/src/simdpackedsearch.c
+++ b/cpp/simdcomp/src/simdpackedsearch.c
--- a/cpp/simdcomp/src/simdpackedselect.c
+++ b/cpp/simdcomp/src/simdpackedselect.c
--- a/cpp/simdcomp/tests/unit.c
+++ b/cpp/simdcomp/tests/unit.c
@@ -1,900 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "simdcomp.h"
-
-
-
-int testshortpack() {
-	int bit;
-	size_t i;
-	size_t length;
-	__m128i * bb;
-	srand(0);
-	printf("testshortpack\n");
-	for (bit = 0; bit < 32; ++bit) {
-		const size_t N = 128;
-		uint32_t * data = malloc(N * sizeof(uint32_t));
-		uint32_t * backdata = malloc(N * sizeof(uint32_t));
-		uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
-
-		for (i = 0; i < N; ++i) {
-			data[i] = rand() & ((1 << bit) - 1);
-		}
-		for (length = 0; length <= N; ++length) {
-			for (i = 0; i < N; ++i) {
-				backdata[i] = 0;
-			}
-			bb = simdpack_shortlength(data, length, (__m128i *) buffer,
-					bit);
-			if((bb - (__m128i *) buffer) * sizeof(__m128i) != (unsigned) simdpack_compressedbytes(length,bit)) {
-			 printf("bug\n");
-			 return -1;
-			}
-			simdunpack_shortlength((__m128i *) buffer, length,
-					backdata, bit);
-			for (i = 0; i < length; ++i) {
-
-				if (data[i] != backdata[i]) {
-				    printf("bug\n");
-					return -1;
-				}
-			}
-		}
-		free(data);
-		free(backdata);
-		free(buffer);
-	}
-	return 0;
-}
-
-int testlongpack() {
-	int bit;
-	size_t i;
-	size_t length;
-	__m128i * bb;
-	srand(0);
-	printf("testlongpack\n");
-	for (bit = 0; bit < 32; ++bit) {
-		const size_t N = 2048;
-		uint32_t * data = malloc(N * sizeof(uint32_t));
-		uint32_t * backdata = malloc(N * sizeof(uint32_t));
-		uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
-
-		for (i = 0; i < N; ++i) {
-			data[i] = rand() & ((1 << bit) - 1);
-		}
-		for (length = 0; length <= N; ++length) {
-			for (i = 0; i < N; ++i) {
-				backdata[i] = 0;
-			}
-			bb = simdpack_length(data, length, (__m128i *) buffer,
-					bit);
-			if((bb - (__m128i *) buffer) * sizeof(__m128i) != (unsigned) simdpack_compressedbytes(length,bit)) {
-			 printf("bug\n");
-			 return -1;
-			}
-			simdunpack_length((__m128i *) buffer, length,
-					backdata, bit);
-			for (i = 0; i < length; ++i) {
-
-				if (data[i] != backdata[i]) {
-				    printf("bug\n");
-					return -1;
-				}
-			}
-		}
-		free(data);
-		free(backdata);
-		free(buffer);
-	}
-	return 0;
-}
-
-
-
-int testset() {
-	int bit;
-	size_t i;
-	const size_t N = 128;
-	uint32_t * data = malloc(N * sizeof(uint32_t));
-	uint32_t * backdata = malloc(N * sizeof(uint32_t));
-	uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
-
-	srand(0);
-
-	for (bit = 0; bit < 32; ++bit) {
-		printf("simple set %d \n",bit);
-
-		for (i = 0; i < N; ++i) {
-			data[i] = rand() & ((1 << bit) - 1);
-		}
-		for (i = 0; i < N; ++i) {
-			backdata[i] = 0;
-		}
-		simdpack(data, (__m128i *) buffer, bit);
-		simdunpack((__m128i *) buffer, backdata, bit);
-		for (i = 0; i < N; ++i) {
-			if (data[i] != backdata[i]) {
-			    printf("bug\n");
-				return -1;
-			}
-		}
-
-		for(i = N  ; i > 0; i--) {
-			simdfastset((__m128i *) buffer, bit, data[N - i], i - 1);
-		}
-		simdunpack((__m128i *) buffer, backdata, bit);
-		for (i = 0; i < N; ++i) {
-			if (data[i] != backdata[N - i - 1]) {
-			    printf("bug\n");
-				return -1;
-			}
-		}
-		simdpack(data, (__m128i *) buffer, bit);
-		for(i = 1  ; i <= N; i++) {
-			simdfastset((__m128i *) buffer, bit, data[i - 1], i - 1);
-		}
-		simdunpack((__m128i *) buffer, backdata, bit);
-		for (i = 0; i < N; ++i) {
-			if (data[i] != backdata[i]) {
-			    printf("bug\n");
-				return -1;
-			}
-		}
-
-	}
-	free(data);
-	free(backdata);
-	free(buffer);
-
-	return 0;
-}
-
-#ifdef __SSE4_1__
-
-int testsetd1() {
-	int bit;
-	size_t i;
-	uint32_t newvalue;
-	const size_t N = 128;
-	uint32_t * data = malloc(N * sizeof(uint32_t));
-	uint32_t * datazeroes = malloc(N * sizeof(uint32_t));
-
-	uint32_t * backdata = malloc(N * sizeof(uint32_t));
-	uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
-
-	srand(0);
-	for (bit = 0; bit < 32; ++bit) {
-		printf("simple set d1 %d \n",bit);
-		data[0] = rand() & ((1 << bit) - 1);
-		datazeroes[0] = 0;
-
-		for (i = 1; i < N; ++i) {
-			data[i] = data[i - 1] + (rand() & ((1 << bit) - 1));
-			datazeroes[i] = 0;
-		}
-		for (i = 0; i < N; ++i) {
-			backdata[i] = 0;
-		}
-		simdpackd1(0,datazeroes, (__m128i *) buffer, bit);
- 	    for(i = 1  ; i <= N; i++) {
-			simdfastsetd1(0,(__m128i *) buffer, bit, data[i - 1], i - 1);
-			newvalue = simdselectd1(0, (const __m128i *) buffer, bit,i - 1);
-			if( newvalue != data[i-1] ) {
-				printf("bad set-select\n");
-				return -1;
-			}
-		}
-		simdunpackd1(0,(__m128i *) buffer, backdata, bit);
-		for (i = 0; i < N; ++i) {
-			if (data[i] != backdata[i])
-				return -1;
-		}
-	}
-	free(data);
-	free(backdata);
-	free(buffer);
-        free(datazeroes);
-	return 0;
-}
-#endif
-
-int testsetFOR() {
-	int bit;
-	size_t i;
-	uint32_t newvalue;
-	const size_t N = 128;
-	uint32_t * data = malloc(N * sizeof(uint32_t));
-	uint32_t * datazeroes = malloc(N * sizeof(uint32_t));
-
-	uint32_t * backdata = malloc(N * sizeof(uint32_t));
-	uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
-
-	srand(0);
-	for (bit = 0; bit < 32; ++bit) {
-		printf("simple set FOR %d \n",bit);
-		for (i = 0; i < N; ++i) {
-			data[i] = (rand() & ((1 << bit) - 1));
-			datazeroes[i] = 0;
-		}
-		for (i = 0; i < N; ++i) {
-			backdata[i] = 0;
-		}
-		simdpackFOR(0,datazeroes, (__m128i *) buffer, bit);
- 	    for(i = 1  ; i <= N; i++) {
- 	    	simdfastsetFOR(0,(__m128i *) buffer, bit, data[i - 1], i - 1);
-			newvalue = simdselectFOR(0, (const __m128i *) buffer, bit,i - 1);
-			if( newvalue != data[i-1] ) {
-				printf("bad set-select\n");
-				return -1;
-			}
-		}
-		simdunpackFOR(0,(__m128i *) buffer, backdata, bit);
-		for (i = 0; i < N; ++i) {
-			if (data[i] != backdata[i])
-				return -1;
-		}
-	}
-	free(data);
-	free(backdata);
-	free(buffer);
-        free(datazeroes);
-	return 0;
-}
-
-int testshortFORpack() {
-	int bit;
-	size_t i;
-	__m128i * rb;
-	size_t length;
-	uint32_t offset = 7;
-	srand(0);
-	for (bit = 0; bit < 32; ++bit) {
-		const size_t N = 128;
-		uint32_t * data = malloc(N * sizeof(uint32_t));
-		uint32_t * backdata = malloc(N * sizeof(uint32_t));
-		uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
-
-		for (i = 0; i < N; ++i) {
-			data[i] = (rand() & ((1 << bit) - 1)) + offset;
-		}
-		for (length = 0; length <= N; ++length) {
-			for (i = 0; i < N; ++i) {
-				backdata[i] = 0;
-			}
-			rb = simdpackFOR_length(offset,data, length, (__m128i *) buffer,
-					bit);
-		    if(((rb - (__m128i *) buffer)*sizeof(__m128i)) != (unsigned) simdpackFOR_compressedbytes(length,bit)) {
-		      return -1;
-		    }
-			simdunpackFOR_length(offset,(__m128i *) buffer, length,
-					backdata, bit);
-			for (i = 0; i < length; ++i) {
-
-				if (data[i] != backdata[i])
-					return -1;
-			}
-		}
-		free(data);
-		free(backdata);
-		free(buffer);
-	}
-	return 0;
-}
-
-
-#ifdef __AVX2__
-
-int testbabyavx() {
-	int bit;
-	int trial;
-	unsigned int i,j;
-	const size_t N = AVXBlockSize;
-	srand(0);
-	printf("testbabyavx\n");
-	printf("bit = ");
-	for (bit = 0; bit < 32; ++bit) {
-		printf(" %d ",bit);
-		fflush(stdout);
-		for(trial = 0; trial < 100; ++trial) {
-			uint32_t * data = malloc(N * sizeof(uint32_t)+ 64 * sizeof(uint32_t));
-			uint32_t * backdata = malloc(N * sizeof(uint32_t) + 64 * sizeof(uint32_t) );
-			__m256i * buffer = malloc((2 * N + 1024) * sizeof(uint32_t) + 32);
-
-			for (i = 0; i < N; ++i) {
-				data[i] = rand() & ((uint32_t)(1 << bit) - 1);
-			}
-			for (i = 0; i < N; ++i) {
-				backdata[i] = 0;
-			}
-            if(avxmaxbits(data) != maxbits_length(data,N)) {
-            	printf("avxmaxbits is buggy\n");
-				return -1;
-            }
-
-			avxpackwithoutmask(data, buffer, bit);
-			avxunpack(buffer, backdata, bit);
-			for (i = 0; i < AVXBlockSize; ++i) {
-				if (data[i] != backdata[i]) {
-					printf("bug\n");
-					for (j = 0; j < N; ++j) {
-						if (data[j] != backdata[j]) {
-							printf("data[%d]=%d v.s. backdata[%d]=%d\n",j,data[j],j,backdata[j]);
-						} else {
-							printf("data[%d]=%d\n",j,data[j]);
-						}
-					}
-					return -1;
-				}
-			}
-			free(data);
-			free(backdata);
-			free(buffer);
-		}
-	}
-	printf("\n");
-	return 0;
-}
-
-int testavx2() {
-    int N = 5000 * AVXBlockSize, gap;
-    __m256i * buffer = malloc(AVXBlockSize * sizeof(uint32_t));
-    uint32_t * datain = malloc(N * sizeof(uint32_t));
-    uint32_t * backbuffer = malloc(AVXBlockSize * sizeof(uint32_t));
-    for (gap = 1; gap <= 387420489; gap *= 3) {
-        int k;
-        printf(" gap = %u \n", gap);
-        for (k = 0; k < N; ++k)
-            datain[k] = k * gap;
-        for (k = 0; k * AVXBlockSize < N; ++k) {
-            /*
-               First part works for general arrays (sorted or unsorted)
-            */
-            int j;
-       	    /* we compute the bit width */
-            const uint32_t b = avxmaxbits(datain + k * AVXBlockSize);
-            if(avxmaxbits(datain + k * AVXBlockSize) != maxbits_length(datain + k * AVXBlockSize,AVXBlockSize)) {
-            	printf("avxmaxbits is buggy %d %d \n",
-            			avxmaxbits(datain + k * AVXBlockSize),
-						maxbits_length(datain + k * AVXBlockSize,AVXBlockSize));
-				return -1;
-            }
-            printf("bit width = %d\n",b);
-
-
-            /* we read 256 integers at "datain + k * AVXBlockSize" and
-               write b 256-bit vectors at "buffer" */
-            avxpackwithoutmask(datain + k * AVXBlockSize, buffer, b);
-            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
-			avxunpack(buffer, backbuffer, b);/* uncompressed */
-			for (j = 0; j < AVXBlockSize; ++j) {
-				if (backbuffer[j] != datain[k * AVXBlockSize + j]) {
-					int i;
-					printf("bug in avxpack\n");
-					for(i = 0; i < AVXBlockSize; ++i) {
-						printf("data[%d]=%d got back %d %s\n",i,
-								datain[k * AVXBlockSize + i],backbuffer[i],
-								datain[k * AVXBlockSize + i]!=backbuffer[i]?"bug":"");
-					}
-					return -2;
-				}
-			}
-        }
-    }
-    free(buffer);
-    free(datain);
-    free(backbuffer);
-    printf("Code looks good.\n");
-    return 0;
-}
-#endif /* avx2 */
-
-int test() {
-    int N = 5000 * SIMDBlockSize, gap;
-    __m128i * buffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-    uint32_t * datain = malloc(N * sizeof(uint32_t));
-    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-    for (gap = 1; gap <= 387420489; gap *= 3) {
-        int k;
-        printf(" gap = %u \n", gap);
-        for (k = 0; k < N; ++k)
-            datain[k] = k * gap;
-        for (k = 0; k * SIMDBlockSize < N; ++k) {
-            /*
-               First part works for general arrays (sorted or unsorted)
-            */
-            int j;
-       	    /* we compute the bit width */
-            const uint32_t b = maxbits(datain + k * SIMDBlockSize);
-            /* we read 128 integers at "datain + k * SIMDBlockSize" and
-               write b 128-bit vectors at "buffer" */
-            simdpackwithoutmask(datain + k * SIMDBlockSize, buffer, b);
-            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
-            simdunpack(buffer, backbuffer, b);/* uncompressed */
-            for (j = 0; j < SIMDBlockSize; ++j) {
-                if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
-                    printf("bug in simdpack\n");
-                    return -2;
-                }
-            }
-
-	    {
-                /*
-                 next part assumes that the data is sorted (uses differential coding)
-                */
-                uint32_t offset = 0;
-                /* we compute the bit width */
-                const uint32_t b1 = simdmaxbitsd1(offset,
-                    datain + k * SIMDBlockSize);
-               /* we read 128 integers at "datain + k * SIMDBlockSize" and
-                  write b1 128-bit vectors at "buffer" */
-               simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, buffer,
-                    b1);
-               /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
-               simdunpackd1(offset, buffer, backbuffer, b1);
-               for (j = 0; j < SIMDBlockSize; ++j) {
-                   if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
-                       printf("bug in simdpack d1\n");
-                       return -3;
-                   }
-               }
-               offset = datain[k * SIMDBlockSize + SIMDBlockSize - 1];
-	    }
-        }
-    }
-    free(buffer);
-    free(datain);
-    free(backbuffer);
-    printf("Code looks good.\n");
-    return 0;
-}
-
-#ifdef __SSE4_1__
-int testFOR() {
-    int N = 5000 * SIMDBlockSize, gap;
-    __m128i * buffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-    uint32_t * datain = malloc(N * sizeof(uint32_t));
-    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-    uint32_t tmax, tmin, tb;
-    for (gap = 1; gap <= 387420489; gap *= 2) {
-        int k;
-        printf(" gap = %u \n", gap);
-        for (k = 0; k < N; ++k)
-            datain[k] = k * gap;
-        for (k = 0; k * SIMDBlockSize < N; ++k) {
-            int j;
-            simdmaxmin_length(datain + k * SIMDBlockSize,SIMDBlockSize,&tmin,&tmax);
-       	    /* we compute the bit width */
-            tb  = bits(tmax - tmin);
-
-
-            /* we read 128 integers at "datain + k * SIMDBlockSize" and
-               write b 128-bit vectors at "buffer" */
-            simdpackFOR(tmin,datain + k * SIMDBlockSize, buffer, tb);
-
-            for (j = 0; j < SIMDBlockSize; ++j) {
-                        uint32_t selectedvalue = simdselectFOR(tmin,buffer,tb,j);
-                    	if (selectedvalue != datain[k * SIMDBlockSize + j]) {
-                            printf("bug in simdselectFOR\n");
-                            return -3;
-                        }
-            }
-            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
-            simdunpackFOR(tmin,buffer, backbuffer, tb);/* uncompressed */
-            for (j = 0; j < SIMDBlockSize; ++j) {
-            	if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
-                    printf("bug in simdpackFOR\n");
-                    return -2;
-                }
-            }
-        }
-    }
-    free(buffer);
-    free(datain);
-    free(backbuffer);
-    printf("Code looks good.\n");
-    return 0;
-}
-#endif
-
-#define MAX 300
-int test_simdmaxbitsd1_length() {
-    uint32_t result, buffer[MAX + 1];
-    int i, j;
-
-    memset(&buffer[0], 0xff, sizeof(buffer));
-
-    /* this test creates buffers of different length; each buffer is
-     * initialized to result in the following deltas:
-     * length 1: 2
-     * length 2: 1 2
-     * length 3: 1 1 2
-     * length 4: 1 1 1 2
-     * length 5: 1 1 1 1 2
-     * etc. Each sequence's "maxbits" is 2. */
-    for (i = 0; i < MAX; i++) {
-      for (j = 0; j < i; j++)
-        buffer[j] = j + 1;
-      buffer[i] = i + 2;
-
-      result = simdmaxbitsd1_length(0, &buffer[0], i + 1);
-      if (result != 2) {
-        printf("simdmaxbitsd1_length: unexpected result %u in loop %d\n",
-                result, i);
-        return -1;
-      }
-    }
-    printf("simdmaxbitsd1_length: ok\n");
-    return 0;
-}
-
-int uint32_cmp(const void *a, const void *b)
-{
-    const uint32_t *ia = (const uint32_t *)a;
-    const uint32_t *ib = (const uint32_t *)b;
-    if(*ia < *ib)
-    	return -1;
-    else if (*ia > *ib)
-    	return 1;
-    return 0;
-}
-
-#ifdef __SSE4_1__
-int test_simdpackedsearch() {
-    uint32_t buffer[128];
-    uint32_t result = 0;
-    int b, i;
-    uint32_t init = 0;
-    __m128i initial = _mm_set1_epi32(init);
-
-    /* initialize the buffer */
-    for (i = 0; i < 128; i++)
-        buffer[i] = (uint32_t)(i + 1);
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 1; b <= 32; b++) {
-        uint32_t out[128];
-        /* delta-encode to 'i' bits */
-        simdpackwithoutmaskd1(init, buffer, (__m128i *)out, b);
-        initial = _mm_setzero_si128();
-        printf("simdsearchd1: %d bits\n", b);
-
-        /* now perform the searches */
-        initial = _mm_set1_epi32(init);
-        assert(simdsearchd1(&initial, (__m128i *)out, b, 0, &result) == 0);
-        assert(result > 0);
-
-        for (i = 1; i <= 128; i++) {
-        	initial = _mm_set1_epi32(init);
-            assert(simdsearchd1(&initial, (__m128i *)out, b,
-                                    (uint32_t)i, &result) == i - 1);
-            assert(result == (unsigned)i);
-        }
-        initial = _mm_set1_epi32(init);
-        assert(simdsearchd1(&initial, (__m128i *)out, b, 200, &result)
-                        == 128);
-        assert(result > 200);
-    }
-    printf("simdsearchd1: ok\n");
-    return 0;
-}
-
-int test_simdpackedsearchFOR() {
-    uint32_t buffer[128];
-    uint32_t result = 0;
-    int b;
-    uint32_t i;
-    uint32_t maxv, tmin, tmax, tb;
-    uint32_t out[128];
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 1; b <= 32; b++) {
-        /* initialize the buffer */
-    	maxv = (b == 32)
-    			? 0xFFFFFFFF
-    					: ((1U<<b) - 1);
-        for (i = 0; i < 128; i++)
-            buffer[i] = maxv * (i + 1) / 128;
-        simdmaxmin_length(buffer,SIMDBlockSize,&tmin,&tmax);
-   	    /* we compute the bit width */
-        tb  = bits(tmax - tmin);
-        /* delta-encode to 'i' bits */
-        simdpackFOR(tmin, buffer, (__m128i *)out, tb);
-        printf("simdsearchd1: %d bits\n", b);
-
-        /* now perform the searches */
-        for (i = 0; i < 128; i++) {
-        	assert(buffer[i] == simdselectFOR(tmin, (__m128i *)out, tb,i));
-        }
-        for (i = 0; i < 128; i++) {
-            int x = simdsearchwithlengthFOR(tmin, (__m128i *)out, tb,
-                                    128,buffer[i], &result) ;
-            assert(simdselectFOR(tmin, (__m128i *)out, tb,x) == buffer[x]);
-            assert(simdselectFOR(tmin, (__m128i *)out, tb,x) == result);
-            assert(buffer[x] == result);
-            assert(result == buffer[i]);
-            assert(buffer[x] == buffer[i]);
-        }
-    }
-    printf("simdsearchFOR: ok\n");
-    return 0;
-}
-
-int test_simdpackedsearch_advanced() {
-    uint32_t buffer[128];
-    uint32_t backbuffer[128];
-	uint32_t out[128];
-    uint32_t result = 0;
-    uint32_t b, i;
-    uint32_t init = 0;
-    __m128i initial = _mm_set1_epi32(init);
-
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 0; b <= 32; b++) {
-    	uint32_t prev = init;
-        /* initialize the buffer */
-        for (i = 0; i < 128; i++) {
-            buffer[i] =  ((uint32_t)(1431655765 * i + 0xFFFFFFFF)) ;
-            if(b < 32) buffer[i] %= (1<<b);
-        }
-
-        qsort(buffer,128, sizeof(uint32_t), uint32_cmp);
-
-        for (i = 0; i < 128; i++) {
-           buffer[i] = buffer[i] + prev;
-           prev = buffer[i];
-        }
-        for (i = 1; i < 128; i++) {
-        	if(buffer[i] < buffer[i-1] )
-        		buffer[i] = buffer[i-1];
-        }
-        assert(simdmaxbitsd1(init, buffer)<=b);
-        for (i = 0; i < 128; i++) {
-        	out[i] = 0; /* memset would do too */
-        }
-
-        /* delta-encode to 'i' bits */
-        simdpackwithoutmaskd1(init, buffer, (__m128i *)out, b);
-        simdunpackd1(init,  (__m128i *)out, backbuffer, b);
-
-        for (i = 0; i < 128; i++) {
-        	assert(buffer[i] == backbuffer[i]);
-        }
-
-        printf("advanced simdsearchd1: %d bits\n", b);
-
-        for (i = 0; i < 128; i++) {
-        	int pos;
-            initial = _mm_set1_epi32(init);
-        	pos = simdsearchd1(&initial, (__m128i *)out, b,
-                    buffer[i], &result);
-        	assert(pos == simdsearchwithlengthd1(init, (__m128i *)out, b, 128,
-                    buffer[i], &result));
-        	assert(buffer[pos] == buffer[i]);
-            if(pos > 0)
-            	assert(buffer[pos - 1] < buffer[i]);
-            assert(result == buffer[i]);
-        }
-        for (i = 0; i < 128; i++) {
-        	int pos;
-        	if(buffer[i] == 0) continue;
-        	initial = _mm_set1_epi32(init);
-        	pos = simdsearchd1(&initial, (__m128i *)out, b,
-                    buffer[i] - 1, &result);
-        	assert(pos == simdsearchwithlengthd1(init, (__m128i *)out, b, 128,
-                    buffer[i] - 1, &result));
-        	assert(buffer[pos] >= buffer[i]  - 1);
-            if(pos > 0)
-            	assert(buffer[pos - 1] < buffer[i]  - 1);
-            assert(result == buffer[pos]);
-        }
-		for (i = 0; i < 128; i++) {
-			int pos;
-			if (buffer[i] + 1 == 0)
-				continue;
-			initial = _mm_set1_epi32(init);
-			pos = simdsearchd1(&initial, (__m128i *) out, b,
-					buffer[i] + 1, &result);
-			assert(pos == simdsearchwithlengthd1(init, (__m128i *)out, b, 128,
-                    buffer[i] + 1, &result));
-			if(pos == 128) {
-				assert(buffer[i] == buffer[127]);
-			} else {
-			  assert(buffer[pos] >= buffer[i] + 1);
-			  if (pos > 0)
-				assert(buffer[pos - 1] < buffer[i] + 1);
-			  assert(result == buffer[pos]);
-			}
-		}
-    }
-    printf("advanced simdsearchd1: ok\n");
-    return 0;
-}
-
-int test_simdpackedselect() {
-    uint32_t buffer[128];
-    uint32_t initial = 33;
-    int b, i;
-
-    /* initialize the buffer */
-    for (i = 0; i < 128; i++)
-        buffer[i] = (uint32_t)(initial + i);
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 1; b <= 32; b++) {
-        uint32_t out[128];
-        /* delta-encode to 'i' bits */
-        simdpackwithoutmaskd1(initial, buffer, (__m128i *)out, b);
-
-        printf("simdselectd1: %d bits\n", b);
-
-        /* now perform the searches */
-        for (i = 0; i < 128; i++) {
-            assert(simdselectd1(initial, (__m128i *)out, b, (uint32_t)i)
-                            == initial + i);
-        }
-    }
-    printf("simdselectd1: ok\n");
-    return 0;
-}
-
-int test_simdpackedselect_advanced() {
-    uint32_t buffer[128];
-    uint32_t initial = 33;
-    uint32_t b;
-    int i;
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 0; b <= 32; b++) {
-        uint32_t prev = initial;
-    	uint32_t out[128];
-        /* initialize the buffer */
-        for (i = 0; i < 128; i++) {
-            buffer[i] =  ((uint32_t)(165576 * i)) ;
-            if(b < 32) buffer[i] %= (1<<b);
-        }
-        for (i = 0; i < 128; i++) {
-           buffer[i] = buffer[i] + prev;
-           prev = buffer[i];
-        }
-
-        for (i = 1; i < 128; i++) {
-        	if(buffer[i] < buffer[i-1] )
-        		buffer[i] = buffer[i-1];
-        }
-        assert(simdmaxbitsd1(initial, buffer)<=b);
-
-        for (i = 0; i < 128; i++) {
-        	out[i] = 0; /* memset would do too */
-        }
-
-        /* delta-encode to 'i' bits */
-        simdpackwithoutmaskd1(initial, buffer, (__m128i *)out, b);
-
-        printf("simdselectd1: %d bits\n", b);
-
-        /* now perform the searches */
-        for (i = 0; i < 128; i++) {
-        	uint32_t valretrieved = simdselectd1(initial, (__m128i *)out, b, (uint32_t)i);
-            assert(valretrieved == buffer[i]);
-        }
-    }
-    printf("advanced simdselectd1: ok\n");
-    return 0;
-}
-#endif
-
-
-int main() {
-    int r;
-    r =  testsetFOR();
-    if (r) {
-         printf("test failure 1\n");
-         return r;
-    }
-
-#ifdef __SSE4_1__
-    r =  testsetd1();
-    if (r) {
-         printf("test failure 2\n");
-         return r;
-    }
-#endif
-    r =  testset();
-    if (r) {
-         printf("test failure 3\n");
-         return r;
-    }
-
-    r = testshortFORpack();
-    if (r) {
-         printf("test failure 4\n");
-         return r;
-    }
-    r = testshortpack();
-    if (r) {
-         printf("test failure 5\n");
-         return r;
-    }
-    r = testlongpack();
-    if (r) {
-         printf("test failure 6\n");
-         return r;
-    }
-#ifdef __SSE4_1__
-    r = test_simdpackedsearchFOR();
-    if (r) {
-         printf("test failure 7\n");
-         return r;
-    }
-
-    r = testFOR();
-    if (r) {
-         printf("test failure 8\n");
-         return r;
-    }
-#endif
-#ifdef __AVX2__
-    r= testbabyavx();
-    if (r) {
-         printf("test failure baby avx\n");
-         return r;
-    }
-
-    r = testavx2();
-    if (r) {
-         printf("test failure 9 avx\n");
-         return r;
-    }
-#endif
-    r = test();
-    if (r) {
-         printf("test failure 9\n");
-         return r;
-    }
-
-    r = test_simdmaxbitsd1_length();
-    if (r) {
-         printf("test failure 10\n");
-         return r;
-    }
-#ifdef __SSE4_1__
-    r = test_simdpackedsearch();
-    if (r) {
-         printf("test failure 11\n");
-         return r;
-    }
-
-    r = test_simdpackedsearch_advanced();
-    if (r) {
-         printf("test failure 12\n");
-         return r;
-    }
-
-    r = test_simdpackedselect();
-    if (r) {
-         printf("test failure 13\n");
-         return r;
-    }
-
-    r = test_simdpackedselect_advanced();
-    if (r) {
-         printf("test failure 14\n");
-         return r;
-    }
-#endif
-    printf("All tests OK!\n");
-
-
-    return 0;
-}
--- a/cpp/simdcomp/tests/unit_chars.c
+++ b/cpp/simdcomp/tests/unit_chars.c
@@ -1,102 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include "simdcomp.h"
-
-
-#define get_random_char() (uint8_t)(rand() % 256);
-
-
-int main() {
-    int N = 5000 * SIMDBlockSize, gap;
-    __m128i * buffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-    uint32_t * datain = malloc(N * sizeof(uint32_t));
-    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-
-    srand(time(NULL));
-
-    for (gap = 1; gap <= 387420489; gap *= 3) {
-        int k;
-        printf(" gap = %u \n", gap);
-
-    /* simulate some random character string, don't care about endiannes */
-        for (k = 0; k < N; ++k) {
-        uint8_t _tmp[4];
- 
-            _tmp[0] = get_random_char();
-            _tmp[1] = get_random_char();
-            _tmp[2] = get_random_char();
-            _tmp[3] = get_random_char();
-
-            memmove(&datain[k], _tmp, 4);
-        }
-        for (k = 0; k * SIMDBlockSize < N; ++k) {
-            /*
-               First part works for general arrays (sorted or unsorted)
-            */
-            int j;
-               /* we compute the bit width */
-            const uint32_t b = maxbits(datain + k * SIMDBlockSize);
-            /* we read 128 integers at "datain + k * SIMDBlockSize" and
-               write b 128-bit vectors at "buffer" */
-            simdpackwithoutmask(datain + k * SIMDBlockSize, buffer, b);
-            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
-            simdunpack(buffer, backbuffer, b);/* uncompressed */
-            for (j = 0; j < SIMDBlockSize; ++j) {
-                uint8_t chars_back[4];
-                uint8_t chars_in[4];
-
-                memmove(chars_back, &backbuffer[j], 4);
-                memmove(chars_in, &datain[k * SIMDBlockSize + j], 4);
-
-                if (chars_in[0] != chars_back[0]
-                    || chars_in[1] != chars_back[1]
-                    || chars_in[2] != chars_back[2]
-                    || chars_in[3] != chars_back[3]) {
-                    printf("bug in simdpack\n");
-                    return -2;
-                }
-            }
-
-            {
-                /*
-                 next part assumes that the data is sorted (uses differential coding)
-                */
-                uint32_t offset = 0;
-                /* we compute the bit width */
-                const uint32_t b1 = simdmaxbitsd1(offset,
-                datain + k * SIMDBlockSize);
-                   /* we read 128 integers at "datain + k * SIMDBlockSize" and
-                  write b1 128-bit vectors at "buffer" */
-                   simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, buffer,
-                b1);
-                   /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
-                   simdunpackd1(offset, buffer, backbuffer, b1);
-                for (j = 0; j < SIMDBlockSize; ++j) {
-                    uint8_t chars_back[4];
-                    uint8_t chars_in[4];
-
-                    memmove(chars_back, &backbuffer[j], 4);
-                    memmove(chars_in, &datain[k * SIMDBlockSize + j], 4);
-
-                    if (chars_in[0] != chars_back[0]
-                        || chars_in[1] != chars_back[1]
-                        || chars_in[2] != chars_back[2]
-                        || chars_in[3] != chars_back[3]) {
-                        printf("bug in simdpack\n");
-                        return -3;
-                    }
-                }
-                offset = datain[k * SIMDBlockSize + SIMDBlockSize - 1];
-            }
-        }
-    }
-    free(buffer);
-    free(datain);
-    free(backbuffer);
-    printf("Code looks good.\n");
-    return 0;
-}
--- a/cpp/simdcomp_wrapper.c
+++ b/cpp/simdcomp_wrapper.c
@@ -1,42 +0,0 @@
-#include "simdcomp.h"
-#include "simdcomputil.h"
-
-// assumes datain has a size of 128 uint32
-// and that buffer is large enough to host the data.
-size_t compress_sorted(
-        const uint32_t* datain,
-        uint8_t* output,
-        const uint32_t offset) {
-    const uint32_t b = simdmaxbitsd1(offset, datain);
-    *output++ = b;
-    simdpackwithoutmaskd1(offset, datain, (__m128i *) output,  b);
-    return 1 + b * sizeof(__m128i);
-}
-
-// assumes datain has a size of 128 uint32
-// and that buffer is large enough to host the data.
-size_t uncompress_sorted(
-        const uint8_t* compressed_data, 
-        uint32_t* output, 
-        uint32_t offset) {
-    const uint32_t b = *compressed_data++;
-    simdunpackd1(offset, (__m128i *)compressed_data, output, b);
-    return 1 + b * sizeof(__m128i);
-}
-
-size_t compress_unsorted(
-        const uint32_t* datain,
-        uint8_t* output) {
-    const uint32_t b = maxbits(datain);
-    *output++ = b;
-    simdpackwithoutmask(datain, (__m128i *) output,  b);
-    return 1 + b * sizeof(__m128i);
-}
-
-size_t uncompress_unsorted(
-        const uint8_t* compressed_data, 
-        uint32_t* output) {
-    const uint32_t b = *compressed_data++;
-    simdunpack((__m128i *)compressed_data, output, b);
-    return 1 + b * sizeof(__m128i);
-}
--- a/cpp/streamvbyte/.gitignore
+++ b/cpp/streamvbyte/.gitignore
@@ -1,32 +0,0 @@
-# Object files
-*.o
-*.ko
-*.obj
-*.elf
-
-# Precompiled Headers
-*.gch
-*.pch
-
-# Libraries
-*.lib
-*.a
-*.la
-*.lo
-
-# Shared objects (inc. Windows DLLs)
-*.dll
-*.so
-*.so.*
-*.dylib
-
-# Executables
-*.exe
-*.out
-*.app
-*.i*86
-*.x86_64
-*.hex
-
-# Debug files
-*.dSYM/
--- a/cpp/streamvbyte/.travis.yml
+++ b/cpp/streamvbyte/.travis.yml
@@ -1,7 +0,0 @@
-language: c
-sudo: false
-compiler:
-  - gcc
-  - clang
-
-script: make && ./unit
--- a/cpp/streamvbyte/LICENSE
+++ b/cpp/streamvbyte/LICENSE
@@ -1,202 +0,0 @@
-Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
--- a/cpp/streamvbyte/README.md
+++ b/cpp/streamvbyte/README.md
@@ -1,60 +0,0 @@
-streamvbyte
-===========
-[![Build Status](https://travis-ci.org/lemire/streamvbyte.png)](https://travis-ci.org/lemire/streamvbyte)
-
-StreamVByte is a new integer compression technique that applies SIMD instructions (vectorization) to
-Google's Group Varint approach. The net result is faster than other byte-oriented compression
-techniques.
-
-The approach is patent-free, the code is available under the Apache License.
-
-
-It includes fast differential coding.
-
-It assumes a recent Intel processor (e.g., haswell or better) .
-
-The code should build using most standard-compliant C99 compilers. The provided makefile
-expects a Linux-like system.
-
-
-Usage:
-
-      make
-      ./unit
-
-See example.c for an example.
-
-Short code sample:
-```C
-// suppose that datain is an array of uint32_t integers
-size_t compsize = streamvbyte_encode(datain, N, compressedbuffer); // encoding
-// here the result is stored in compressedbuffer using compsize bytes
-streamvbyte_decode(compressedbuffer, recovdata, N); // decoding (fast)
-```
-
-If the values are sorted, then it might be preferable to use differential coding:
-```C
-// suppose that datain is an array of uint32_t integers
-size_t compsize = streamvbyte_delta_encode(datain, N, compressedbuffer,0); // encoding
-// here the result is stored in compressedbuffer using compsize bytes
-streamvbyte_delta_decode(compressedbuffer, recovdata, N,0); // decoding (fast)
-```
-You have to know how many integers were coded when you decompress. You can store this 
-information along with the compressed stream.
-
-See also
--------
-* SIMDCompressionAndIntersection: A C++ library to compress and intersect sorted lists of integers using SIMD instructions https://github.com/lemire/SIMDCompressionAndIntersect
-* The FastPFOR C++ library : Fast integer compression https://github.com/lemire/FastPFor
-* High-performance dictionary coding https://github.com/lemire/dictionary
-* LittleIntPacker: C library to pack and unpack short arrays of integers as fast as possible https://github.com/lemire/LittleIntPacker
-* The SIMDComp library: A simple C library for compressing lists of integers using binary packing https://github.com/lemire/simdcomp
-* MaskedVByte: Fast decoder for VByte-compressed integers https://github.com/lemire/MaskedVByte
-* CSharpFastPFOR: A C#  integer compression library  https://github.com/Genbox/CSharpFastPFOR
-* JavaFastPFOR: A java integer compression library https://github.com/lemire/JavaFastPFOR
-* Encoding: Integer Compression Libraries for Go https://github.com/zhenjl/encoding
-* FrameOfReference is a C++ library dedicated to frame-of-reference (FOR) compression: https://github.com/lemire/FrameOfReference
-* libvbyte: A fast implementation for varbyte 32bit/64bit integer compression https://github.com/cruppstahl/libvbyte
-* TurboPFor is a C library that offers lots of interesting optimizations. Well worth checking! (GPL license) https://github.com/powturbo/TurboPFor
-* Oroch is a C++ library that offers a usable API (MIT license) https://github.com/ademakov/Oroch
-
--- a/cpp/streamvbyte/example.c
+++ b/cpp/streamvbyte/example.c
@@ -1,24 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#include "streamvbyte.h"
-
-int main() {
-	int N = 5000;
-	uint32_t * datain = malloc(N * sizeof(uint32_t));
-	uint8_t * compressedbuffer = malloc(N * sizeof(uint32_t));
-	uint32_t * recovdata = malloc(N * sizeof(uint32_t));
-	for (int k = 0; k < N; ++k)
-		datain[k] = 120;
-	size_t compsize = streamvbyte_encode(datain, N, compressedbuffer); // encoding
-	// here the result is stored in compressedbuffer using compsize bytes
-	size_t compsize2 = streamvbyte_decode(compressedbuffer, recovdata,
-					N); // decoding (fast)
-	assert(compsize == compsize2);
-	free(datain);
-	free(compressedbuffer);
-	free(recovdata);
-	printf("Compressed %d integers down to %d bytes.\n",N,(int) compsize);
-	return 0;
-}
--- a/cpp/streamvbyte/include/streamvbyte.h
+++ b/cpp/streamvbyte/include/streamvbyte.h
@@ -1,19 +0,0 @@
-
-#ifndef VARINTDECODE_H_
-#define VARINTDECODE_H_
-#define __STDC_FORMAT_MACROS
-#include <inttypes.h>
-#include <stdint.h>// please use a C99-compatible compiler
-#include <stddef.h>
-
-
-// Encode an array of a given length read from in to bout in varint format.
-// Returns the number of bytes written.
-size_t streamvbyte_encode(const uint32_t *in, uint32_t length, uint8_t *out);
-
-// Read "length" 32-bit integers in varint format from in, storing the result in out.
-// Returns the number of bytes read.
-size_t streamvbyte_decode(const uint8_t* in, uint32_t* out, uint32_t length);
-
-
-#endif /* VARINTDECODE_H_ */
--- a/cpp/streamvbyte/include/streamvbytedelta.h
+++ b/cpp/streamvbyte/include/streamvbytedelta.h
@@ -1,24 +0,0 @@
-/*
- * streamvbytedelta.h
- *
- *  Created on: Apr 14, 2016
- *      Author: lemire
- */
-
-#ifndef INCLUDE_STREAMVBYTEDELTA_H_
-#define INCLUDE_STREAMVBYTEDELTA_H_
-
-
-// Encode an array of a given length read from in to bout in StreamVByte format.
-// Returns the number of bytes written.
-// this version uses differential coding (coding differences between values) starting at prev (you can often set prev to zero)
-size_t streamvbyte_delta_encode(const uint32_t *in, uint32_t length, uint8_t *out, uint32_t  prev);
-
-// Read "length" 32-bit integers in StreamVByte format from in, storing the result in out.
-// Returns the number of bytes read.
-// this version uses differential coding (coding differences between values) starting at prev (you can often set prev to zero)
-size_t streamvbyte_delta_decode(const uint8_t* in, uint32_t* out, uint32_t length, uint32_t  prev);
-
-
-
-#endif /* INCLUDE_STREAMVBYTEDELTA_H_ */
--- a/cpp/streamvbyte/makefile
+++ b/cpp/streamvbyte/makefile
@@ -1,58 +0,0 @@
-# minimalist makefile
-.SUFFIXES:
-#
-.SUFFIXES: .cpp .o .c .h
-
-CFLAGS = -fPIC -march=native -std=c99 -O3 -Wall -Wextra -pedantic -Wshadow
-LDFLAGS = -shared
-LIBNAME=libstreamvbyte.so.0.0.1
-all:  unit $(LIBNAME)
-test:
-	./unit
-install: $(OBJECTS)
-	cp $(LIBNAME) /usr/local/lib
-	ln -s /usr/local/lib/$(LIBNAME) /usr/local/lib/libstreamvbyte.so
-	ldconfig
-	cp $(HEADERS) /usr/local/include
-
-
-
-HEADERS=./include/streamvbyte.h ./include/streamvbytedelta.h 
-
-uninstall:
-	for h in $(HEADERS) ; do rm  /usr/local/$$h; done
-	rm  /usr/local/lib/$(LIBNAME)
-	rm /usr/local/lib/libstreamvbyte.so
-	ldconfig
-
-
-OBJECTS= streamvbyte.o streamvbytedelta.o
-
-
-
-streamvbytedelta.o: ./src/streamvbytedelta.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/streamvbytedelta.c -Iinclude
-
-
-streamvbyte.o: ./src/streamvbyte.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/streamvbyte.c -Iinclude
-
-
-
-$(LIBNAME): $(OBJECTS)
-	$(CC) $(CFLAGS) -o $(LIBNAME) $(OBJECTS)  $(LDFLAGS)
-
-
-
-
-example: ./example.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o example ./example.c -Iinclude  $(OBJECTS)
-
-unit: ./tests/unit.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o unit ./tests/unit.c -Iinclude  $(OBJECTS)
-
-dynunit: ./tests/unit.c    $(HEADERS) $(LIBNAME)
-	$(CC) $(CFLAGS) -o dynunit ./tests/unit.c -Iinclude  -lstreamvbyte
-
-clean:
-	rm -f unit *.o $(LIBNAME) example
--- a/cpp/streamvbyte/src/streamvbyte.c
+++ b/cpp/streamvbyte/src/streamvbyte.c
@@ -1,495 +0,0 @@
-#include "streamvbyte.h"
-#if defined(_MSC_VER)
-     /* Microsoft C/C++-compatible compiler */
-     #include <intrin.h>
-#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
-     /* GCC-compatible compiler, targeting x86/x86-64 */
-     #include <x86intrin.h>
-#elif defined(__GNUC__) && defined(__ARM_NEON__)
-     /* GCC-compatible compiler, targeting ARM with NEON */
-     #include <arm_neon.h>
-#elif defined(__GNUC__) && defined(__IWMMXT__)
-     /* GCC-compatible compiler, targeting ARM with WMMX */
-     #include <mmintrin.h>
-#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__))
-     /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
-     #include <altivec.h>
-#elif defined(__GNUC__) && defined(__SPE__)
-     /* GCC-compatible compiler, targeting PowerPC with SPE */
-     #include <spe.h>
-#endif
-
-static uint8_t lengthTable[256] = { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9,
-		10, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7, 8,
-		9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10,
-		11, 12, 10, 11, 12, 13, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10,
-		11, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10,
-		8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11,
-		12, 10, 11, 12, 13, 11, 12, 13, 14, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10,
-		11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12,
-		13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10,
-		11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, 7, 8, 9, 10, 8,
-		9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, 12,
-		10, 11, 12, 13, 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12,
-		13, 14, 12, 13, 14, 15, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15,
-		13, 14, 15, 16 };
-
-static uint8_t shuffleTable[256][16] = { { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1,
-		-1, -1, 3, -1, -1, -1 }, // 1111
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 },  // 2111
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 3111
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 4111
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 },  // 1211
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 2211
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 3211
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, -1, -1, -1 },     // 4211
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 1311
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 2311
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1, -1, -1 },     // 3311
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, -1, -1, -1 },      // 4311
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, -1, -1, -1 },    // 1411
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, -1, -1, -1 },     // 2411
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, -1, -1, -1 },      // 3411
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, -1, -1, -1 },       // 4411
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1 },  // 1121
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 },   // 2121
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 3121
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 4121
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 },   // 1221
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 2221
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 3221
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, -1, -1, -1 },      // 4221
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 1321
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 2321
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1, -1, -1 },      // 3321
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, -1, -1, -1 },       // 4321
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, -1, -1, -1 },     // 1421
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, -1, -1, -1 },      // 2421
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, -1, -1, -1 },       // 3421
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, -1, -1, -1 },       // 4421
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1 },   // 1131
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 },    // 2131
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 3131
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 4131
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 },    // 1231
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 2231
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 3231
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, -1, -1, -1 },       // 4231
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 1331
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 2331
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1, -1, -1 },       // 3331
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, -1, -1, -1 },       // 4331
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, -1, -1 },      // 1431
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, -1, -1, -1 },       // 2431
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, -1, -1, -1 },       // 3431
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, -1, -1, -1 },       // 4431
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1 },    // 1141
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 },     // 2141
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 3141
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 4141
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 },     // 1241
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 2241
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 3241
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, -1, -1, -1 },       // 4241
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 1341
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 2341
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, -1, -1, -1 },       // 3341
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, -1, -1, -1 },       // 4341
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1 },       // 1441
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1 },       // 2441
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1 },       // 3441
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1 },       // 4441
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1 },  // 1112
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 },   // 2112
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 3112
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 4112
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 },   // 1212
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 2212
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 3212
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, -1, -1 },      // 4212
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 1312
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 2312
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, -1, -1 },      // 3312
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, -1, -1 },       // 4312
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, -1, -1 },     // 1412
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, -1, -1 },      // 2412
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, -1, -1 },       // 3412
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, -1, -1 },       // 4412
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1 },   // 1122
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 },    // 2122
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 3122
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 4122
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 },    // 1222
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 2222
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 3222
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, -1, -1 },       // 4222
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 1322
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 2322
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, -1, -1 },       // 3322
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, -1, -1 },       // 4322
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, -1, -1 },      // 1422
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, -1, -1 },       // 2422
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, -1, -1 },       // 3422
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, -1, -1 },       // 4422
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1 },    // 1132
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 },     // 2132
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 3132
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 4132
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 },     // 1232
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 2232
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 3232
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, -1, -1 },       // 4232
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 1332
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 2332
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, -1, -1 },       // 3332
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, -1, -1 },       // 4332
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, -1, -1 },       // 1432
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, -1, -1 },       // 2432
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, -1, -1 },       // 3432
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, -1, -1 },       // 4432
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1 },     // 1142
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 },      // 2142
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 3142
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 4142
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 },      // 1242
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 2242
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 3242
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, -1, -1 },       // 4242
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 1342
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 2342
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, -1, -1 },       // 3342
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, -1, -1 },       // 4342
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1 },       // 1442
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1 },       // 2442
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1 },       // 3442
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1 },       // 4442
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1 },   // 1113
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 },    // 2113
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 3113
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 4113
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 },    // 1213
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 2213
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 3213
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, -1 },       // 4213
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 1313
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 2313
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1 },       // 3313
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, -1 },       // 4313
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, -1 },      // 1413
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, -1 },       // 2413
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, -1 },       // 3413
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, -1 },       // 4413
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1 },    // 1123
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 },     // 2123
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 3123
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 4123
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 },     // 1223
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 2223
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 3223
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, -1 },       // 4223
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 1323
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 2323
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, -1 },       // 3323
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, -1 },       // 4323
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, -1 },       // 1423
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, -1 },       // 2423
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, -1 },       // 3423
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, -1 },       // 4423
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1 },     // 1133
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 },      // 2133
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 3133
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 4133
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 },      // 1233
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 2233
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 3233
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, -1 },       // 4233
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 1333
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 2333
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1 },       // 3333
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, -1 },       // 4333
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, -1 },       // 1433
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, -1 },       // 2433
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, -1 },       // 3433
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, -1 },       // 4433
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1 },      // 1143
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 },       // 2143
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 3143
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 4143
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 },       // 1243
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 2243
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 3243
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, -1 },       // 4243
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 1343
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 2343
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, -1 },       // 3343
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, -1 },       // 4343
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1 },       // 1443
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1 },       // 2443
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1 },       // 3443
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -1 },       // 4443
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6 },    // 1114
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 },     // 2114
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 3114
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 4114
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 },     // 1214
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 2214
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 3214
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, 10 },       // 4214
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 1314
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 2314
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, 10 },       // 3314
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, 11 },       // 4314
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, 9 },       // 1414
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, 10 },       // 2414
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, 11 },       // 3414
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, 12 },       // 4414
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7 },     // 1124
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 },      // 2124
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 3124
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 4124
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 },      // 1224
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 2224
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 3224
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, 11 },       // 4224
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 1324
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 2324
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, 11 },       // 3324
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, 12 },       // 4324
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, 10 },       // 1424
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, 11 },       // 2424
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, 12 },       // 3424
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, 13 },       // 4424
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8 },      // 1134
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 },       // 2134
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 3134
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 4134
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 },       // 1234
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 2234
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 3234
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, 12 },       // 4234
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 1334
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 2334
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, 12 },       // 3334
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, 13 },       // 4334
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11 },       // 1434
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12 },       // 2434
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, 13 },       // 3434
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, 14 },       // 4434
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9 },       // 1144
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 },       // 2144
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 3144
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 4144
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 },       // 1244
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 2244
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 3244
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, 13 },       // 4244
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 1344
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 2344
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13 },       // 3344
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, 14 },       // 4344
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 },       // 1444
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 },       // 2444
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 },       // 3444
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }        // 4444
-};
-
-static uint8_t _encode_data(uint32_t val, uint8_t *__restrict__ *dataPtrPtr) {
-	uint8_t *dataPtr = *dataPtrPtr;
-	uint8_t code;
-
-	if (val < (1 << 8)) { // 1 byte
-		*dataPtr = (uint8_t)(val);
-		*dataPtrPtr += 1;
-		code = 0;
-	} else if (val < (1 << 16)) { // 2 bytes
-		*(uint16_t *) dataPtr = (uint16_t)(val);
-		*dataPtrPtr += 2;
-		code = 1;
-	} else if (val < (1 << 24)) { // 3 bytes
-		*(uint16_t *) dataPtr = (uint16_t)(val);
-		*(dataPtr + 2) = (uint8_t)(val >> 16);
-		*dataPtrPtr += 3;
-		code = 2;
-	} else { // 4 bytes
-		*(uint32_t *) dataPtr = val;
-		*dataPtrPtr += 4;
-		code = 3;
-	}
-
-	return code;
-}
-
-static uint8_t *svb_encode_scalar(const uint32_t *in,
-		uint8_t *__restrict__ keyPtr, uint8_t *__restrict__ dataPtr,
-		uint32_t count) {
-	if (count == 0)
-		return dataPtr; // exit immediately if no data
-
-	uint8_t shift = 0; // cycles 0, 2, 4, 6, 0, 2, 4, 6, ...
-	uint8_t key = 0;
-	for (uint32_t c = 0; c < count; c++) {
-		if (shift == 8) {
-			shift = 0;
-			*keyPtr++ = key;
-			key = 0;
-		}
-		uint32_t val = in[c];
-		uint8_t code = _encode_data(val, &dataPtr);
-		key |= code << shift;
-		shift += 2;
-	}
-
-	*keyPtr = key;  // write last key (no increment needed)
-	return dataPtr; // pointer to first unused data byte
-}
-
-// Encode an array of a given length read from in to bout in streamvbyte format.
-// Returns the number of bytes written.
-size_t streamvbyte_encode(const uint32_t *in, uint32_t count, uint8_t *out) {
-	uint8_t *keyPtr = out;
-	uint32_t keyLen = (count + 3) / 4; // 2-bits rounded to full byte
-	uint8_t *dataPtr = keyPtr + keyLen; // variable byte data after all keys
-	return svb_encode_scalar(in, keyPtr, dataPtr, count) - out;
-}
-
-static inline __m128i _decode_avx(uint32_t key,
-		const uint8_t *__restrict__ *dataPtrPtr) {
-	uint8_t len = lengthTable[key];
-	__m128i Data = _mm_loadu_si128((__m128i *) *dataPtrPtr);
-	__m128i Shuf = *(__m128i *) &shuffleTable[key];
-
-	Data = _mm_shuffle_epi8(Data, Shuf);
-	*dataPtrPtr += len;
-	return Data;
-}
-
-static inline void _write_avx(uint32_t *out, __m128i Vec) {
-	_mm_storeu_si128((__m128i *) out, Vec);
-}
-
-static inline uint32_t _decode_data(const uint8_t **dataPtrPtr, uint8_t code) {
-	const uint8_t *dataPtr = *dataPtrPtr;
-	uint32_t val;
-
-	if (code == 0) { // 1 byte
-		val = (uint32_t) * dataPtr;
-		dataPtr += 1;
-	} else if (code == 1) { // 2 bytes
-		val = (uint32_t) * (uint16_t *) dataPtr;
-		dataPtr += 2;
-	} else if (code == 2) { // 3 bytes
-		val = (uint32_t) * (uint16_t *) dataPtr;
-		val |= *(dataPtr + 2) << 16;
-		dataPtr += 3;
-	} else {                      // code == 3
-		val = *(uint32_t *) dataPtr; // 4 bytes
-		dataPtr += 4;
-	}
-
-	*dataPtrPtr = dataPtr;
-	return val;
-}
-static const uint8_t *svb_decode_scalar(uint32_t *outPtr, const uint8_t *keyPtr,
-		const uint8_t *dataPtr, uint32_t count) {
-	if (count == 0)
-		return dataPtr; // no reads or writes if no data
-
-	uint8_t shift = 0;
-	uint32_t key = *keyPtr++;
-	for (uint32_t c = 0; c < count; c++) {
-		if (shift == 8) {
-			shift = 0;
-			key = *keyPtr++;
-		}
-		uint32_t val = _decode_data(&dataPtr, (key >> shift) & 0x3);
-		*outPtr++ = val;
-		shift += 2;
-	}
-
-	return dataPtr; // pointer to first unused byte after end
-}
-
-const uint8_t *svb_decode_avx_simple(uint32_t *out,
-		const uint8_t *__restrict__ keyPtr, const uint8_t *__restrict__ dataPtr,
-		uint64_t count) {
-
-	uint64_t keybytes = count / 4; // number of key bytes
-	__m128i Data;
-	if (keybytes >= 8) {
-
-		int64_t Offset = -(int64_t) keybytes / 8 + 1;
-
-		const uint64_t *keyPtr64 = (const uint64_t *) keyPtr - Offset;
-		uint64_t nextkeys = keyPtr64[Offset];
-		for (; Offset != 0; ++Offset) {
-			uint64_t keys = nextkeys;
-			nextkeys = keyPtr64[Offset + 1];
-
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 4, Data);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out + 8, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 12, Data);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out + 16, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 20, Data);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out + 24, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 28, Data);
-
-			out += 32;
-		}
-		{
-			uint64_t keys = nextkeys;
-
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 4, Data);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out + 8, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 12, Data);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out + 16, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 20, Data);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out + 24, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 28, Data);
-
-			out += 32;
-		}
-	}
-	uint64_t consumedkeys = keybytes - (keybytes & 7);
-	return svb_decode_scalar(out, keyPtr + consumedkeys, dataPtr, count & 31);
-}
-
-// Read count 32-bit integers in maskedvbyte format from in, storing the result in out.  Returns the number of bytes read.
-size_t streamvbyte_decode(const uint8_t* in, uint32_t* out, uint32_t count) {
-	if (count == 0)
-		return 0;
-	const uint8_t *keyPtr = in;            // full list of keys is next
-	uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up)
-	const uint8_t *dataPtr = keyPtr + keyLen;  // data starts at end of keys
-	return svb_decode_avx_simple(out, keyPtr, dataPtr, count) - in;
-
-}
--- a/cpp/streamvbyte/src/streamvbytedelta.c
+++ b/cpp/streamvbyte/src/streamvbytedelta.c
@@ -1,575 +0,0 @@
-#include "streamvbyte.h"
-#if defined(_MSC_VER)
-     /* Microsoft C/C++-compatible compiler */
-     #include <intrin.h>
-#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
-     /* GCC-compatible compiler, targeting x86/x86-64 */
-     #include <x86intrin.h>
-#elif defined(__GNUC__) && defined(__ARM_NEON__)
-     /* GCC-compatible compiler, targeting ARM with NEON */
-     #include <arm_neon.h>
-#elif defined(__GNUC__) && defined(__IWMMXT__)
-     /* GCC-compatible compiler, targeting ARM with WMMX */
-     #include <mmintrin.h>
-#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__))
-     /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
-     #include <altivec.h>
-#elif defined(__GNUC__) && defined(__SPE__)
-     /* GCC-compatible compiler, targeting PowerPC with SPE */
-     #include <spe.h>
-#endif
-
-static uint8_t lengthTable[256] = { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9,
-		10, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7, 8,
-		9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10,
-		11, 12, 10, 11, 12, 13, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10,
-		11, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10,
-		8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11,
-		12, 10, 11, 12, 13, 11, 12, 13, 14, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10,
-		11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12,
-		13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10,
-		11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, 7, 8, 9, 10, 8,
-		9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, 12,
-		10, 11, 12, 13, 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12,
-		13, 14, 12, 13, 14, 15, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15,
-		13, 14, 15, 16 };
-
-static uint8_t shuffleTable[256][16] = { { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1,
-		-1, -1, 3, -1, -1, -1 }, // 1111
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 },  // 2111
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 3111
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 4111
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 },  // 1211
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 2211
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 3211
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, -1, -1, -1 },     // 4211
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 1311
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 2311
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1, -1, -1 },     // 3311
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, -1, -1, -1 },      // 4311
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, -1, -1, -1 },    // 1411
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, -1, -1, -1 },     // 2411
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, -1, -1, -1 },      // 3411
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, -1, -1, -1 },       // 4411
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1 },  // 1121
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 },   // 2121
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 3121
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 4121
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 },   // 1221
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 2221
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 3221
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, -1, -1, -1 },      // 4221
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 1321
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 2321
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1, -1, -1 },      // 3321
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, -1, -1, -1 },       // 4321
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, -1, -1, -1 },     // 1421
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, -1, -1, -1 },      // 2421
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, -1, -1, -1 },       // 3421
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, -1, -1, -1 },       // 4421
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1 },   // 1131
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 },    // 2131
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 3131
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 4131
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 },    // 1231
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 2231
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 3231
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, -1, -1, -1 },       // 4231
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 1331
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 2331
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1, -1, -1 },       // 3331
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, -1, -1, -1 },       // 4331
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, -1, -1 },      // 1431
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, -1, -1, -1 },       // 2431
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, -1, -1, -1 },       // 3431
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, -1, -1, -1 },       // 4431
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1 },    // 1141
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 },     // 2141
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 3141
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 4141
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 },     // 1241
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 2241
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 3241
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, -1, -1, -1 },       // 4241
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 1341
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 2341
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, -1, -1, -1 },       // 3341
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, -1, -1, -1 },       // 4341
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1 },       // 1441
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1 },       // 2441
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1 },       // 3441
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1 },       // 4441
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1 },  // 1112
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 },   // 2112
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 3112
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 4112
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 },   // 1212
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 2212
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 3212
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, -1, -1 },      // 4212
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 1312
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 2312
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, -1, -1 },      // 3312
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, -1, -1 },       // 4312
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, -1, -1 },     // 1412
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, -1, -1 },      // 2412
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, -1, -1 },       // 3412
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, -1, -1 },       // 4412
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1 },   // 1122
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 },    // 2122
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 3122
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 4122
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 },    // 1222
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 2222
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 3222
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, -1, -1 },       // 4222
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 1322
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 2322
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, -1, -1 },       // 3322
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, -1, -1 },       // 4322
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, -1, -1 },      // 1422
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, -1, -1 },       // 2422
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, -1, -1 },       // 3422
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, -1, -1 },       // 4422
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1 },    // 1132
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 },     // 2132
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 3132
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 4132
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 },     // 1232
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 2232
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 3232
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, -1, -1 },       // 4232
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 1332
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 2332
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, -1, -1 },       // 3332
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, -1, -1 },       // 4332
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, -1, -1 },       // 1432
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, -1, -1 },       // 2432
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, -1, -1 },       // 3432
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, -1, -1 },       // 4432
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1 },     // 1142
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 },      // 2142
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 3142
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 4142
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 },      // 1242
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 2242
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 3242
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, -1, -1 },       // 4242
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 1342
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 2342
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, -1, -1 },       // 3342
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, -1, -1 },       // 4342
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1 },       // 1442
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1 },       // 2442
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1 },       // 3442
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1 },       // 4442
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1 },   // 1113
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 },    // 2113
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 3113
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 4113
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 },    // 1213
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 2213
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 3213
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, -1 },       // 4213
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 1313
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 2313
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1 },       // 3313
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, -1 },       // 4313
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, -1 },      // 1413
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, -1 },       // 2413
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, -1 },       // 3413
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, -1 },       // 4413
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1 },    // 1123
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 },     // 2123
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 3123
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 4123
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 },     // 1223
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 2223
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 3223
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, -1 },       // 4223
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 1323
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 2323
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, -1 },       // 3323
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, -1 },       // 4323
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, -1 },       // 1423
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, -1 },       // 2423
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, -1 },       // 3423
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, -1 },       // 4423
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1 },     // 1133
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 },      // 2133
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 3133
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 4133
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 },      // 1233
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 2233
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 3233
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, -1 },       // 4233
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 1333
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 2333
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1 },       // 3333
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, -1 },       // 4333
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, -1 },       // 1433
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, -1 },       // 2433
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, -1 },       // 3433
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, -1 },       // 4433
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1 },      // 1143
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 },       // 2143
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 3143
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 4143
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 },       // 1243
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 2243
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 3243
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, -1 },       // 4243
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 1343
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 2343
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, -1 },       // 3343
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, -1 },       // 4343
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1 },       // 1443
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1 },       // 2443
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1 },       // 3443
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -1 },       // 4443
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6 },    // 1114
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 },     // 2114
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 3114
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 4114
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 },     // 1214
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 2214
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 3214
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, 10 },       // 4214
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 1314
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 2314
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, 10 },       // 3314
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, 11 },       // 4314
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, 9 },       // 1414
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, 10 },       // 2414
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, 11 },       // 3414
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, 12 },       // 4414
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7 },     // 1124
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 },      // 2124
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 3124
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 4124
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 },      // 1224
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 2224
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 3224
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, 11 },       // 4224
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 1324
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 2324
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, 11 },       // 3324
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, 12 },       // 4324
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, 10 },       // 1424
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, 11 },       // 2424
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, 12 },       // 3424
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, 13 },       // 4424
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8 },      // 1134
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 },       // 2134
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 3134
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 4134
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 },       // 1234
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 2234
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 3234
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, 12 },       // 4234
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 1334
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 2334
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, 12 },       // 3334
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, 13 },       // 4334
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11 },       // 1434
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12 },       // 2434
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, 13 },       // 3434
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, 14 },       // 4434
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9 },       // 1144
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 },       // 2144
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 3144
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 4144
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 },       // 1244
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 2244
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 3244
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, 13 },       // 4244
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 1344
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 2344
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13 },       // 3344
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, 14 },       // 4344
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 },       // 1444
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 },       // 2444
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 },       // 3444
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }        // 4444
-};
-
-static uint8_t _encode_data(uint32_t val, uint8_t *__restrict__ *dataPtrPtr) {
-	uint8_t *dataPtr = *dataPtrPtr;
-	uint8_t code;
-
-	if (val < (1 << 8)) { // 1 byte
-		*dataPtr = (uint8_t)(val);
-		*dataPtrPtr += 1;
-		code = 0;
-	} else if (val < (1 << 16)) { // 2 bytes
-		*(uint16_t *) dataPtr = (uint16_t)(val);
-		*dataPtrPtr += 2;
-		code = 1;
-	} else if (val < (1 << 24)) { // 3 bytes
-		*(uint16_t *) dataPtr = (uint16_t)(val);
-		*(dataPtr + 2) = (uint8_t)(val >> 16);
-		*dataPtrPtr += 3;
-		code = 2;
-	} else { // 4 bytes
-		*(uint32_t *) dataPtr = val;
-		*dataPtrPtr += 4;
-		code = 3;
-	}
-
-	return code;
-}
-
-static uint8_t *svb_encode_scalar_d1_init(const uint32_t *in,
-		uint8_t *__restrict__ keyPtr, uint8_t *__restrict__ dataPtr,
-		uint32_t count, uint32_t prev) {
-	if (count == 0)
-		return dataPtr; // exit immediately if no data
-
-	uint8_t shift = 0; // cycles 0, 2, 4, 6, 0, 2, 4, 6, ...
-	uint8_t key = 0;
-	for (uint32_t c = 0; c < count; c++) {
-		if (shift == 8) {
-			shift = 0;
-			*keyPtr++ = key;
-			key = 0;
-		}
-		uint32_t val = in[c] - prev;
-		prev = in[c];
-		uint8_t code = _encode_data(val, &dataPtr);
-		key |= code << shift;
-		shift += 2;
-	}
-
-	*keyPtr = key;  // write last key (no increment needed)
-	return dataPtr; // pointer to first unused data byte
-}
-
-size_t streamvbyte_delta_encode(const uint32_t *in, uint32_t count, uint8_t *out,
-		uint32_t prev) {
-	uint8_t *keyPtr = out;         // keys come immediately after 32-bit count
-	uint32_t keyLen = (count + 3) / 4; // 2-bits rounded to full byte
-	uint8_t *dataPtr = keyPtr + keyLen; // variable byte data after all keys
-
-	return svb_encode_scalar_d1_init(in, keyPtr, dataPtr, count, prev) - out;
-
-}
-
-static inline __m128i _decode_avx(uint32_t key, const uint8_t *__restrict__ *dataPtrPtr) {
-	uint8_t len = lengthTable[key];
-	__m128i Data = _mm_loadu_si128((__m128i *) *dataPtrPtr);
-	__m128i Shuf = *(__m128i *) &shuffleTable[key];
-
-	Data = _mm_shuffle_epi8(Data, Shuf);
-	*dataPtrPtr += len;
-
-	return Data;
-}
-#define BroadcastLastXMM 0xFF // bits 0-7 all set to choose highest element
-
-
-
-static inline void _write_avx(uint32_t *out, __m128i Vec) {
-	_mm_storeu_si128((__m128i *) out, Vec);
-}
-
-static __m128i _write_avx_d1(uint32_t *out, __m128i Vec, __m128i Prev) {
-	__m128i Add = _mm_slli_si128(Vec, 4); // Cycle 1: [- A B C] (already done)
-	Prev = _mm_shuffle_epi32(Prev, BroadcastLastXMM); // Cycle 2: [P P P P]
-	Vec = _mm_add_epi32(Vec, Add);                    // Cycle 2: [A AB BC CD]
-	Add = _mm_slli_si128(Vec, 8);                     // Cycle 3: [- - A AB]
-	Vec = _mm_add_epi32(Vec, Prev);                 // Cycle 3: [PA PAB PBC PCD]
-	Vec = _mm_add_epi32(Vec, Add); // Cycle 4: [PA PAB PABC PABCD]
-
-	_write_avx(out, Vec);
-	return Vec;
-}
-
-#ifndef _MSC_VER
-static __m128i High16To32 = {0xFFFF0B0AFFFF0908, 0xFFFF0F0EFFFF0D0C};
-#else
-static __m128i High16To32 = {8,  9,  -1, -1, 10, 11, -1, -1,
-                           12, 13, -1, -1, 14, 15, -1, -1};
-#endif
-
-static inline __m128i _write_16bit_avx_d1(uint32_t *out, __m128i Vec, __m128i Prev) {
-  // vec == [A B C D E F G H] (16 bit values)
-  __m128i Add = _mm_slli_si128(Vec, 2);               // [- A B C D E F G]
-  Prev = _mm_shuffle_epi32(Prev, BroadcastLastXMM); // [P P P P] (32-bit)
-  Vec = _mm_add_epi32(Vec, Add);                    // [A AB BC CD DE FG GH]
-  Add = _mm_slli_si128(Vec, 4);                     // [- - A AB BC CD DE EF]
-  Vec = _mm_add_epi32(Vec, Add);      // [A AB ABC ABCD BCDE CDEF DEFG EFGH]
-  __m128i V1 = _mm_cvtepu16_epi32(Vec); // [A AB ABC ABCD] (32-bit)
-  V1 = _mm_add_epi32(V1, Prev);       // [PA PAB PABC PABCD] (32-bit)
-  __m128i V2 =
-      _mm_shuffle_epi8(Vec, High16To32); // [BCDE CDEF DEFG EFGH] (32-bit)
-  V2 = _mm_add_epi32(V1, V2); // [PABCDE PABCDEF PABCDEFG PABCDEFGH] (32-bit)
-  _write_avx(out, V1);
-  _write_avx(out + 4, V2);
-  return V2;
-}
-
-static inline uint32_t _decode_data(const uint8_t **dataPtrPtr, uint8_t code) {
-	const uint8_t *dataPtr = *dataPtrPtr;
-	uint32_t val;
-
-	if (code == 0) { // 1 byte
-		val = (uint32_t) * dataPtr;
-		dataPtr += 1;
-	} else if (code == 1) { // 2 bytes
-		val = (uint32_t) * (uint16_t *) dataPtr;
-		dataPtr += 2;
-	} else if (code == 2) { // 3 bytes
-		val = (uint32_t) * (uint16_t *) dataPtr;
-		val |= *(dataPtr + 2) << 16;
-		dataPtr += 3;
-	} else {                      // code == 3
-		val = *(uint32_t *) dataPtr; // 4 bytes
-		dataPtr += 4;
-	}
-
-	*dataPtrPtr = dataPtr;
-	return val;
-}
-
-const uint8_t *svb_decode_scalar_d1_init(uint32_t *outPtr, const uint8_t *keyPtr,
-		const uint8_t *dataPtr, uint32_t count,
-                                   uint32_t prev) {
-  if (count == 0)
-    return dataPtr; // no reads or writes if no data
-
-  uint8_t shift = 0;
-  uint32_t key = *keyPtr++;
-
-  for (uint32_t c = 0; c < count; c++) {
-    if (shift == 8) {
-      shift = 0;
-      key = *keyPtr++;
-    }
-    uint32_t val = _decode_data(&dataPtr, (key >> shift) & 0x3);
-    val += prev;
-    *outPtr++ = val;
-    prev = val;
-    shift += 2;
-  }
-
-  return dataPtr; // pointer to first unused byte after end
-}
-
-const uint8_t *svb_decode_avx_d1_init(uint32_t *out, const uint8_t *__restrict__ keyPtr,
-		const uint8_t *__restrict__ dataPtr, uint64_t count, uint32_t prev) {
-	uint64_t keybytes = count / 4; // number of key bytes
-	if (keybytes >= 8) {
-		__m128i Prev = _mm_set1_epi32(prev);
-		__m128i Data;
-
-		int64_t Offset = -(int64_t) keybytes / 8 + 1;
-
-		const uint64_t *keyPtr64 = (const uint64_t *) keyPtr - Offset;
-		uint64_t nextkeys = keyPtr64[Offset];
-		for (; Offset != 0; ++Offset) {
-			uint64_t keys = nextkeys;
-			nextkeys = keyPtr64[Offset + 1];
-			// faster 16-bit delta since we only have 8-bit values
-			if (!keys) { // 32 1-byte ints in a row
-
-				Data = _mm_cvtepu8_epi16(_mm_lddqu_si128((__m128i *) (dataPtr)));
-				Prev = _write_16bit_avx_d1(out, Data, Prev);
-				Data = _mm_cvtepu8_epi16(
-						_mm_lddqu_si128((__m128i *) (dataPtr + 8)));
-				Prev = _write_16bit_avx_d1(out + 8, Data, Prev);
-				Data = _mm_cvtepu8_epi16(
-						_mm_lddqu_si128((__m128i *) (dataPtr + 16)));
-				Prev = _write_16bit_avx_d1(out + 16, Data, Prev);
-				Data = _mm_cvtepu8_epi16(
-						_mm_lddqu_si128((__m128i *) (dataPtr + 24)));
-				Prev = _write_16bit_avx_d1(out + 24, Data, Prev);
-				out += 32;
-				dataPtr += 32;
-				continue;
-			}
-
-			Data = _decode_avx(keys & 0x00FF, &dataPtr);
-			Prev = _write_avx_d1(out, Data, Prev);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			Prev = _write_avx_d1(out + 4, Data, Prev);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0x00FF), &dataPtr);
-			Prev = _write_avx_d1(out + 8, Data, Prev);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			Prev = _write_avx_d1(out + 12, Data, Prev);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0x00FF), &dataPtr);
-			Prev = _write_avx_d1(out + 16, Data, Prev);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			Prev = _write_avx_d1(out + 20, Data, Prev);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0x00FF), &dataPtr);
-			Prev = _write_avx_d1(out + 24, Data, Prev);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			Prev = _write_avx_d1(out + 28, Data, Prev);
-
-			out += 32;
-		}
-		{
-			uint64_t keys = nextkeys;
-			// faster 16-bit delta since we only have 8-bit values
-			if (!keys) { // 32 1-byte ints in a row
-				Data = _mm_cvtepu8_epi16(_mm_lddqu_si128((__m128i *) (dataPtr)));
-				Prev = _write_16bit_avx_d1(out, Data, Prev);
-				Data = _mm_cvtepu8_epi16(
-						_mm_lddqu_si128((__m128i *) (dataPtr + 8)));
-				Prev = _write_16bit_avx_d1(out + 8, Data, Prev);
-				Data = _mm_cvtepu8_epi16(
-						_mm_lddqu_si128((__m128i *) (dataPtr + 16)));
-				Prev = _write_16bit_avx_d1(out + 16, Data, Prev);
-				Data = _mm_cvtepu8_epi16(
-						_mm_loadl_epi64((__m128i *) (dataPtr + 24)));
-				Prev = _write_16bit_avx_d1(out + 24, Data, Prev);
-				out += 32;
-				dataPtr += 32;
-
-			} else {
-
-				Data = _decode_avx(keys & 0x00FF, &dataPtr);
-				Prev = _write_avx_d1(out, Data, Prev);
-				Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-				Prev = _write_avx_d1(out + 4, Data, Prev);
-
-				keys >>= 16;
-				Data = _decode_avx((keys & 0x00FF), &dataPtr);
-				Prev = _write_avx_d1(out + 8, Data, Prev);
-				Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-				Prev = _write_avx_d1(out + 12, Data, Prev);
-
-				keys >>= 16;
-				Data = _decode_avx((keys & 0x00FF), &dataPtr);
-				Prev = _write_avx_d1(out + 16, Data, Prev);
-				Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-				Prev = _write_avx_d1(out + 20, Data, Prev);
-
-				keys >>= 16;
-				Data = _decode_avx((keys & 0x00FF), &dataPtr);
-				Prev = _write_avx_d1(out + 24, Data, Prev);
-				Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-				Prev = _write_avx_d1(out + 28, Data, Prev);
-
-				out += 32;
-			}
-		}
-		prev = out[-1];
-	}
-	uint64_t consumedkeys = keybytes - (keybytes & 7);
-	return svb_decode_scalar_d1_init(out, keyPtr + consumedkeys, dataPtr,
-			count & 31, prev);
-}
-
-size_t streamvbyte_delta_decode(const uint8_t* in, uint32_t* out,
-		uint32_t count, uint32_t prev) {
-	uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up)
-	const uint8_t *keyPtr = in;
-	const uint8_t *dataPtr = keyPtr + keyLen;  // data starts at end of keys
-	return svb_decode_avx_d1_init(out, keyPtr, dataPtr, count, prev) - in;
-}
--- a/cpp/streamvbyte/tests/unit.c
+++ b/cpp/streamvbyte/tests/unit.c
@@ -1,73 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "streamvbyte.h"
-#include "streamvbytedelta.h"
-
-int main() {
-	int N = 4096;
-	uint32_t * datain = malloc(N * sizeof(uint32_t));
-	uint8_t * compressedbuffer = malloc(2 * N * sizeof(uint32_t));
-	uint32_t * recovdata = malloc(N * sizeof(uint32_t));
-
-	for (int length = 0; length <= N;) {
-		printf("length = %d \n", length);
-		for (uint32_t gap = 1; gap <= 387420489; gap *= 3) {
-			for (int k = 0; k < length; ++k)
-				datain[k] = gap;
-			size_t compsize = streamvbyte_encode(datain, length,
-					compressedbuffer);
-			size_t usedbytes = streamvbyte_decode(compressedbuffer, recovdata,
-					length);
-			if (compsize != usedbytes) {
-				printf(
-						"[streamvbyte_decode] code is buggy gap = %d, size mismatch %d %d \n",
-						(int) gap, (int) compsize, (int) usedbytes);
-				return -1;
-			}
-			for (int k = 0; k < length; ++k) {
-				if (recovdata[k] != datain[k]) {
-					printf("[streamvbyte_decode] code is buggy gap = %d\n",
-							(int) gap);
-					return -1;
-				}
-			}
-		}
-
-		printf("Delta \n");
-		for (size_t gap = 1; gap <= 531441; gap *= 3) {
-			for (int k = 0; k < length; ++k)
-				datain[k] = gap * k;
-			size_t compsize = streamvbyte_delta_encode(datain, length,
-					compressedbuffer, 0);
-			size_t usedbytes = streamvbyte_delta_decode(compressedbuffer,
-					recovdata, length, 0);
-			if (compsize != usedbytes) {
-				printf(
-						"[streamvbyte_delta_decode] code is buggy gap = %d, size mismatch %d %d \n",
-						(int) gap, (int) compsize, (int) usedbytes);
-				return -1;
-			}
-			for (int k = 0; k < length; ++k) {
-				if (recovdata[k] != datain[k]) {
-					printf(
-							"[streamvbyte_delta_decode] code is buggy gap = %d\n",
-							(int) gap);
-					return -1;
-				}
-			}
-
-		}
-
-		if (length < 128)
-			++length;
-		else {
-			length *= 2;
-		}
-	}
-	free(datain);
-	free(compressedbuffer);
-	free(recovdata);
-	printf("Code looks good.\n");
-	return 0;
-}
--- a/examples/custom_tokenizer.rs
+++ b/examples/custom_tokenizer.rs
@@ -0,0 +1,226 @@
+extern crate tantivy;
+extern crate tempdir;
+
+#[macro_use]
+extern crate serde_json;
+
+use std::path::Path;
+use tantivy::collector::TopCollector;
+use tantivy::query::QueryParser;
+use tantivy::schema::*;
+use tantivy::tokenizer::NgramTokenizer;
+use tantivy::Index;
+use tempdir::TempDir;
+
+fn main() {
+    // Let's create a temporary directory for the
+    // sake of this example
+    if let Ok(dir) = TempDir::new("tantivy_token_example_dir") {
+        run_example(dir.path()).unwrap();
+        dir.close().unwrap();
+    }
+}
+
+fn run_example(index_path: &Path) -> tantivy::Result<()> {
+    // # Defining the schema
+    //
+    // The Tantivy index requires a very strict schema.
+    // The schema declares which fields are in the index,
+    // and for each field, its type and "the way it should
+    // be indexed".
+
+    // first we need to define a schema ...
+    let mut schema_builder = SchemaBuilder::default();
+
+    // Our first field is title.
+    // In this example we want to use NGram searching
+    // we will set that to 3 characters, so any three
+    // char in the title should be findable.
+    let text_field_indexing = TextFieldIndexing::default()
+        .set_tokenizer("ngram3")
+        .set_index_option(IndexRecordOption::WithFreqsAndPositions);
+    let text_options = TextOptions::default()
+        .set_indexing_options(text_field_indexing)
+        .set_stored();
+    schema_builder.add_text_field("title", text_options);
+
+    // Our second field is body.
+    // We want full-text search for it, but we do not
+    // need to be able to be able to retrieve it
+    // for our application.
+    //
+    // We can make our index lighter and
+    // by omitting `STORED` flag.
+    schema_builder.add_text_field("body", TEXT);
+
+    let schema = schema_builder.build();
+
+    // # Indexing documents
+    //
+    // Let's create a brand new index.
+    //
+    // This will actually just save a meta.json
+    // with our schema in the directory.
+    let index = Index::create_in_dir(index_path, schema.clone())?;
+
+    // here we are registering our custome tokenizer
+    // this will store tokens of 3 characters each
+    index
+        .tokenizers()
+        .register("ngram3", NgramTokenizer::new(3, 3, false));
+
+    // To insert document we need an index writer.
+    // There must be only one writer at a time.
+    // This single `IndexWriter` is already
+    // multithreaded.
+    //
+    // Here we use a buffer of 50MB per thread. Using a bigger
+    // heap for the indexer can increase its throughput.
+    let mut index_writer = index.writer(50_000_000)?;
+
+    // Let's index our documents!
+    // We first need a handle on the title and the body field.
+
+    // ### Create a document "manually".
+    //
+    // We can create a document manually, by setting the fields
+    // one by one in a Document object.
+    let title = schema.get_field("title").unwrap();
+    let body = schema.get_field("body").unwrap();
+
+    let mut old_man_doc = Document::default();
+    old_man_doc.add_text(title, "The Old Man and the Sea");
+    old_man_doc.add_text(
+        body,
+        "He was an old man who fished alone in a skiff in the Gulf Stream and \
+         he had gone eighty-four days now without taking a fish.",
+    );
+
+    // ... and add it to the `IndexWriter`.
+    index_writer.add_document(old_man_doc);
+
+    // ### Create a document directly from json.
+    //
+    // Alternatively, we can use our schema to parse a
+    // document object directly from json.
+    // The document is a string, but we use the `json` macro
+    // from `serde_json` for the convenience of multi-line support.
+    let json = json!({
+       "title": "Of Mice and Men",
+       "body": "A few miles south of Soledad, the Salinas River drops in close to the hillside \
+                bank and runs deep and green. The water is warm too, for it has slipped twinkling \
+                over the yellow sands in the sunlight before reaching the narrow pool. On one \
+                side of the river the golden foothill slopes curve up to the strong and rocky \
+                Gabilan Mountains, but on the valley side the water is lined with trees—willows \
+                fresh and green with every spring, carrying in their lower leaf junctures the \
+                debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
+                limbs and branches that arch over the pool"
+    });
+    let mice_and_men_doc = schema.parse_document(&json.to_string())?;
+
+    index_writer.add_document(mice_and_men_doc);
+
+    // Multi-valued field are allowed, they are
+    // expressed in JSON by an array.
+    // The following document has two titles.
+    let json = json!({
+       "title": ["Frankenstein", "The Modern Prometheus"],
+       "body": "You will rejoice to hear that no disaster has accompanied the commencement of an \
+                enterprise which you have regarded with such evil forebodings.  I arrived here \
+                yesterday, and my first task is to assure my dear sister of my welfare and \
+                increasing confidence in the success of my undertaking."
+    });
+    let frankenstein_doc = schema.parse_document(&json.to_string())?;
+
+    index_writer.add_document(frankenstein_doc);
+
+    // This is an example, so we will only index 3 documents
+    // here. You can check out tantivy's tutorial to index
+    // the English wikipedia. Tantivy's indexing is rather fast.
+    // Indexing 5 million articles of the English wikipedia takes
+    // around 4 minutes on my computer!
+
+    // ### Committing
+    //
+    // At this point our documents are not searchable.
+    //
+    //
+    // We need to call .commit() explicitly to force the
+    // index_writer to finish processing the documents in the queue,
+    // flush the current index to the disk, and advertise
+    // the existence of new documents.
+    //
+    // This call is blocking.
+    index_writer.commit()?;
+
+    // If `.commit()` returns correctly, then all of the
+    // documents that have been added are guaranteed to be
+    // persistently indexed.
+    //
+    // In the scenario of a crash or a power failure,
+    // tantivy behaves as if has rolled back to its last
+    // commit.
+
+    // # Searching
+    //
+    // Let's search our index. Start by reloading
+    // searchers in the index. This should be done
+    // after every commit().
+    index.load_searchers()?;
+
+    // Afterwards create one (or more) searchers.
+    //
+    // You should create a searcher
+    // every time you start a "search query".
+    let searcher = index.searcher();
+
+    // The query parser can interpret human queries.
+    // Here, if the user does not specify which
+    // field they want to search, tantivy will search
+    // in both title and body.
+    let query_parser = QueryParser::for_index(&index, vec![title, body]);
+
+    // here we want to get a hit on the 'ken' in Frankenstein
+    let query = query_parser.parse_query("ken")?;
+
+    // A query defines a set of documents, as
+    // well as the way they should be scored.
+    //
+    // A query created by the query parser is scored according
+    // to a metric called Tf-Idf, and will consider
+    // any document matching at least one of our terms.
+
+    // ### Collectors
+    //
+    // We are not interested in all of the documents but
+    // only in the top 10. Keeping track of our top 10 best documents
+    // is the role of the TopCollector.
+    let mut top_collector = TopCollector::with_limit(10);
+
+    // We can now perform our query.
+    searcher.search(&*query, &mut top_collector)?;
+
+    // Our top collector now contains the 10
+    // most relevant doc ids...
+    let doc_addresses = top_collector.docs();
+
+    // The actual documents still need to be
+    // retrieved from Tantivy's store.
+    //
+    // Since the body field was not configured as stored,
+    // the document returned will only contain
+    // a title.
+
+    for doc_address in doc_addresses {
+        let retrieved_doc = searcher.doc(&doc_address)?;
+        println!("{}", schema.to_json(&retrieved_doc));
+    }
+
+    // Wait for indexing and merging threads to shut down.
+    // Usually this isn't needed, but in `main` we try to
+    // delete the temporary directory and that fails on
+    // Windows if the files are still open.
+    index_writer.wait_merging_threads()?;
+
+    Ok(())
+}
--- a/examples/simple_search.rs
+++ b/examples/simple_search.rs
@@ -5,11 +5,11 @@ extern crate tempdir;
 extern crate serde_json;

 use std::path::Path;
-use tempdir::TempDir;
-use tantivy::Index;
-use tantivy::schema::*;
 use tantivy::collector::TopCollector;
 use tantivy::query::QueryParser;
+use tantivy::schema::*;
+use tantivy::Index;
+use tempdir::TempDir;

 fn main() {
    // Let's create a temporary directory for the
@@ -20,10 +20,7 @@ fn main() {
    }
 }

-
 fn run_example(index_path: &Path) -> tantivy::Result<()> {
-
-
    // # Defining the schema
    //
    // The Tantivy index requires a very strict schema.
@@ -31,7 +28,6 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // and for each field, its type and "the way it should
    // be indexed".

-
    // first we need to define a schema ...
    let mut schema_builder = SchemaBuilder::default();

@@ -62,16 +58,13 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {

    let schema = schema_builder.build();

-
-
    // # Indexing documents
    //
    // Let's create a brand new index.
    //
    // This will actually just save a meta.json
    // with our schema in the directory.
-    let index = Index::create(index_path, schema.clone())?;
-
+    let index = Index::create_in_dir(index_path, schema.clone())?;

    // To insert document we need an index writer.
    // There must be only one writer at a time.
@@ -85,7 +78,6 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // Let's index our documents!
    // We first need a handle on the title and the body field.

-
    // ### Create a document "manually".
    //
    // We can create a document manually, by setting the fields
@@ -98,7 +90,7 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    old_man_doc.add_text(
        body,
        "He was an old man who fished alone in a skiff in the Gulf Stream and \
-                          he had gone eighty-four days now without taking a fish.",
+         he had gone eighty-four days now without taking a fish.",
    );

    // ... and add it to the `IndexWriter`.
@@ -145,7 +137,6 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // Indexing 5 million articles of the English wikipedia takes
    // around 4 minutes on my computer!

-
    // ### Committing
    //
    // At this point our documents are not searchable.
@@ -167,7 +158,6 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // tantivy behaves as if has rolled back to its last
    // commit.

-
    // # Searching
    //
    // Let's search our index. Start by reloading
@@ -192,7 +182,6 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // A ticket has been opened regarding this problem.
    let query = query_parser.parse_query("sea whale")?;

-
    // A query defines a set of documents, as
    // well as the way they should be scored.
    //
--- a/rustfmt.toml
+++ b/rustfmt.toml
@@ -0,0 +1 @@
+use_try_shorthand = true
--- a/script/build-doc.sh
+++ b/script/build-doc.sh
@@ -1,10 +0,0 @@
-#!/bin/bash
-DEST=target/doc/tantivy/docs/
-mkdir -p $DEST
-
-for f in $(ls docs/*.md)
-do
-    rustdoc $f -o $DEST --markdown-css ../../rustdoc.css --markdown-css style.css
-done
-
-cp docs/*.css $DEST
--- a/script/profile.sh
+++ b/script/profile.sh
@@ -1,5 +0,0 @@
-#/bin/bash
-valgrind --tool=cachegrind target/release/tantivy-bench -i /data/wiki-index -q ./queries.txt -n 3
-valgrind --tool=callgrind target/release/tantivy-bench -i /data/wiki-index -q ./queries.txt -n 3
-
-
--- a/src/collector/chained_collector.rs
+++ b/src/collector/chained_collector.rs
@@ -1,9 +1,9 @@
-use Result;
 use collector::Collector;
+use DocId;
+use Result;
+use Score;
 use SegmentLocalId;
 use SegmentReader;
-use DocId;
-use Score;

 /// Collector that does nothing.
 /// This is used in the chain Collector and will hopefully
@@ -16,11 +16,66 @@ impl Collector for DoNothingCollector {
    }
    #[inline]
    fn collect(&mut self, _doc: DocId, _score: Score) {}
+    #[inline]
+    fn requires_scoring(&self) -> bool {
+        false
+    }
 }

 /// Zero-cost abstraction used to collect on multiple collectors.
 /// This contraption is only usable if the type of your collectors
 /// are known at compile time.
+///
+/// ```rust
+/// #[macro_use]
+/// extern crate tantivy;
+/// use tantivy::schema::{SchemaBuilder, TEXT};
+/// use tantivy::{Index, Result};
+/// use tantivy::collector::{CountCollector, TopCollector, chain};
+/// use tantivy::query::QueryParser;
+///
+/// # fn main() { example().unwrap(); }
+/// fn example() -> Result<()> {
+///     let mut schema_builder = SchemaBuilder::new();
+///     let title = schema_builder.add_text_field("title", TEXT);
+///     let schema = schema_builder.build();
+///     let index = Index::create_in_ram(schema);
+///     {
+///         let mut index_writer = index.writer(3_000_000)?;
+///         index_writer.add_document(doc!(
+///             title => "The Name of the Wind",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "The Diary of Muadib",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "A Dairy Cow",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "The Diary of a Young Girl",
+///         ));
+///         index_writer.commit().unwrap();
+///     }
+///
+///     index.load_searchers()?;
+///     let searcher = index.searcher();
+///
+///     {
+///         let mut top_collector = TopCollector::with_limit(2);
+///         let mut count_collector = CountCollector::default();
+///         {
+///             let mut collectors = chain().push(&mut top_collector).push(&mut count_collector);
+///             let query_parser = QueryParser::for_index(&index, vec![title]);
+///             let query = query_parser.parse_query("diary")?;
+///             searcher.search(&*query, &mut collectors).unwrap();
+///         }
+///         assert_eq!(count_collector.count(), 2);
+///         assert!(top_collector.at_capacity());
+///     }
+///
+///     Ok(())
+/// }
+/// ```
 pub struct ChainedCollector<Left: Collector, Right: Collector> {
    left: Left,
    right: Right,
@@ -42,8 +97,8 @@ impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Rig
        segment_local_id: SegmentLocalId,
        segment: &SegmentReader,
    ) -> Result<()> {
-        try!(self.left.set_segment(segment_local_id, segment));
-        try!(self.right.set_segment(segment_local_id, segment));
+        self.left.set_segment(segment_local_id, segment)?;
+        self.right.set_segment(segment_local_id, segment)?;
        Ok(())
    }

@@ -51,6 +106,10 @@ impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Rig
        self.left.collect(doc, score);
        self.right.collect(doc, score);
    }
+
+    fn requires_scoring(&self) -> bool {
+        self.left.requires_scoring() || self.right.requires_scoring()
+    }
 }

 /// Creates a `ChainedCollector`
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -1,12 +1,60 @@
 use super::Collector;
 use DocId;
-use Score;
 use Result;
-use SegmentReader;
+use Score;
 use SegmentLocalId;
+use SegmentReader;

 /// `CountCollector` collector only counts how many
 /// documents match the query.
+///
+/// ```rust
+/// #[macro_use]
+/// extern crate tantivy;
+/// use tantivy::schema::{SchemaBuilder, TEXT};
+/// use tantivy::{Index, Result};
+/// use tantivy::collector::CountCollector;
+/// use tantivy::query::QueryParser;
+///
+/// # fn main() { example().unwrap(); }
+/// fn example() -> Result<()> {
+///     let mut schema_builder = SchemaBuilder::new();
+///     let title = schema_builder.add_text_field("title", TEXT);
+///     let schema = schema_builder.build();
+///     let index = Index::create_in_ram(schema);
+///     {
+///         let mut index_writer = index.writer(3_000_000)?;
+///         index_writer.add_document(doc!(
+///             title => "The Name of the Wind",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "The Diary of Muadib",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "A Dairy Cow",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "The Diary of a Young Girl",
+///         ));
+///         index_writer.commit().unwrap();
+///     }
+///
+///     index.load_searchers()?;
+///     let searcher = index.searcher();
+///
+///     {
+///	        let mut count_collector = CountCollector::default();
+///         let query_parser = QueryParser::for_index(&index, vec![title]);
+///         let query = query_parser.parse_query("diary")?;
+///         searcher.search(&*query, &mut count_collector).unwrap();
+///
+///         assert_eq!(count_collector.count(), 2);
+///     }
+///
+///     Ok(())
+/// }
+/// ```
+#[derive(Default)]
 pub struct CountCollector {
    count: usize,
 }
@@ -19,12 +67,6 @@ impl CountCollector {
    }
 }

-impl Default for CountCollector {
-    fn default() -> CountCollector {
-        CountCollector { count: 0 }
-    }
-}
-
 impl Collector for CountCollector {
    fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
        Ok(())
@@ -33,23 +75,27 @@ impl Collector for CountCollector {
    fn collect(&mut self, _: DocId, _: Score) {
        self.count += 1;
    }
+
+    fn requires_scoring(&self) -> bool {
+        false
+    }
 }

 #[cfg(test)]
 mod tests {

-    use super::*;
-    use test::Bencher;
-    use collector::Collector;
+    use collector::{Collector, CountCollector};

-    #[bench]
-    fn build_collector(b: &mut Bencher) {
-        b.iter(|| {
-            let mut count_collector = CountCollector::default();
-            for doc in 0..1_000_000 {
-                count_collector.collect(doc, 1f32);
-            }
-            count_collector.count()
-        });
+    #[test]
+    fn test_count_collector() {
+        let mut count_collector = CountCollector::default();
+        assert_eq!(count_collector.count(), 0);
+        count_collector.collect(0u32, 1f32);
+        assert_eq!(count_collector.count(), 1);
+        assert_eq!(count_collector.count(), 1);
+        count_collector.collect(1u32, 1f32);
+        assert_eq!(count_collector.count(), 2);
+        assert!(!count_collector.requires_scoring());
    }
+
 }
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -1,113 +1,664 @@
-use std::cmp::Eq;
-use std::collections::HashMap;
-use std::hash::Hash;
-
 use collector::Collector;
-use fastfield::FastFieldReader;
+use docset::SkipResult;
+use fastfield::FacetReader;
+use schema::Facet;
 use schema::Field;
+use std::cell::UnsafeCell;
+use std::collections::btree_map;
+use std::collections::BTreeMap;
+use std::collections::BTreeSet;
+use std::collections::BinaryHeap;
+use std::collections::Bound;
+use std::iter::Peekable;
+use std::mem;
+use std::{u64, usize};
+use termdict::TermMerger;

+use std::cmp::Ordering;
 use DocId;
 use Result;
 use Score;
-use SegmentReader;
 use SegmentLocalId;
+use SegmentReader;

-/// Facet collector  for i64/u64 fast field
-pub struct FacetCollector<T>
-where
-    T: FastFieldReader,
-    T::ValueType: Eq + Hash,
-{
-    counters: HashMap<T::ValueType, u64>,
-    field: Field,
-    ff_reader: Option<T>,
+struct Hit<'a> {
+    count: u64,
+    facet: &'a Facet,
 }

-impl<T> FacetCollector<T>
-where
-    T: FastFieldReader,
-    T::ValueType: Eq + Hash,
-{
-    /// Creates a new facet collector for aggregating a given field.
-    pub fn new(field: Field) -> FacetCollector<T> {
-        FacetCollector {
-            counters: HashMap::new(),
-            field: field,
-            ff_reader: None,
-        }
+impl<'a> Eq for Hit<'a> {}
+
+impl<'a> PartialEq<Hit<'a>> for Hit<'a> {
+    fn eq(&self, other: &Hit) -> bool {
+        self.count == other.count
    }
 }

-impl<T> Collector for FacetCollector<T>
-where
-    T: FastFieldReader,
-    T::ValueType: Eq + Hash,
-{
+impl<'a> PartialOrd<Hit<'a>> for Hit<'a> {
+    fn partial_cmp(&self, other: &Hit) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<'a> Ord for Hit<'a> {
+    fn cmp(&self, other: &Self) -> Ordering {
+        other.count.cmp(&self.count)
+    }
+}
+
+struct SegmentFacetCounter {
+    pub facet_reader: FacetReader,
+    pub facet_ords: Vec<u64>,
+    pub facet_counts: Vec<u64>,
+}
+
+fn facet_depth(facet_bytes: &[u8]) -> usize {
+    if facet_bytes.is_empty() {
+        0
+    } else {
+        facet_bytes.iter().cloned().filter(|b| *b == 0u8).count() + 1
+    }
+}
+
+/// Collector for faceting
+///
+/// The collector collects all facets. You need to configure it
+/// beforehand with the facet you want to extract.
+///
+/// This is done by calling `.add_facet(...)` with the root of the
+/// facet you want to extract as argument.
+///
+/// Facet counts will only be computed for the facet that are direct children
+/// of such a root facet.
+///
+/// For instance, if your index represents books, your hierarchy of facets
+/// may contain `category`, `language`.
+///
+/// The category facet may include `subcategories`. For instance, a book
+/// could belong to `/category/fiction/fantasy`.
+///
+/// If you request the facet counts for `/category`, the result will be
+/// the breakdown of counts for the direct children of `/category`
+/// (e.g. `/category/fiction`, `/category/biography`, `/category/personal_development`).
+///
+/// Once collection is finished, you can harvest its results in the form
+/// of a `FacetCounts` object, and extract your face                t counts from it.
+///
+/// This implementation assumes you are working with a number of facets that
+/// is much hundreds of time lower than your number of documents.
+///
+///
+/// ```rust
+/// #[macro_use]
+/// extern crate tantivy;
+/// use tantivy::schema::{Facet, SchemaBuilder, TEXT};
+/// use tantivy::{Index, Result};
+/// use tantivy::collector::FacetCollector;
+/// use tantivy::query::AllQuery;
+///
+/// # fn main() { example().unwrap(); }
+/// fn example() -> Result<()> {
+///     let mut schema_builder = SchemaBuilder::new();
+///
+///     // Facet have their own specific type.
+///     // It is not a bad practise to put all of your
+///     // facet information in the same field.
+///     let facet = schema_builder.add_facet_field("facet");
+///     let title = schema_builder.add_text_field("title", TEXT);
+///     let schema = schema_builder.build();
+///     let index = Index::create_in_ram(schema);
+///     {
+///         let mut index_writer = index.writer(3_000_000)?;
+///         // a document can be associated to any number of facets
+///         index_writer.add_document(doc!(
+///             title => "The Name of the Wind",
+///             facet => Facet::from("/lang/en"),
+///             facet => Facet::from("/category/fiction/fantasy")
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "Dune",
+///             facet => Facet::from("/lang/en"),
+///             facet => Facet::from("/category/fiction/sci-fi")
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "La Vénus d'Ille",
+///             facet => Facet::from("/lang/fr"),
+///             facet => Facet::from("/category/fiction/fantasy"),
+///             facet => Facet::from("/category/fiction/horror")
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "The Diary of a Young Girl",
+///             facet => Facet::from("/lang/en"),
+///             facet => Facet::from("/category/biography")
+///         ));
+///         index_writer.commit().unwrap();
+///     }
+///
+///     index.load_searchers()?;
+///     let searcher = index.searcher();
+///
+///     {
+///			let mut facet_collector = FacetCollector::for_field(facet);
+///         facet_collector.add_facet("/lang");
+///         facet_collector.add_facet("/category");
+///         searcher.search(&AllQuery, &mut facet_collector).unwrap();
+///
+///         // this object contains count aggregate for all of the facets.
+///         let counts = facet_collector.harvest();
+///
+///         // This lists all of the facet counts
+///         let facets: Vec<(&Facet, u64)> = counts
+///             .get("/category")
+///             .collect();
+///         assert_eq!(facets, vec![
+///             (&Facet::from("/category/biography"), 1),
+///             (&Facet::from("/category/fiction"), 3)
+///         ]);
+///     }
+///
+///     {
+///			let mut facet_collector = FacetCollector::for_field(facet);
+///         facet_collector.add_facet("/category/fiction");
+///         searcher.search(&AllQuery, &mut facet_collector).unwrap();
+///
+///         // this object contains count aggregate for all of the facets.
+///         let counts = facet_collector.harvest();
+///
+///         // This lists all of the facet counts
+///         let facets: Vec<(&Facet, u64)> = counts
+///             .get("/category/fiction")
+///             .collect();
+///         assert_eq!(facets, vec![
+///             (&Facet::from("/category/fiction/fantasy"), 2),
+///             (&Facet::from("/category/fiction/horror"), 1),
+///             (&Facet::from("/category/fiction/sci-fi"), 1)
+///         ]);
+///     }
+///
+///    {
+///			let mut facet_collector = FacetCollector::for_field(facet);
+///         facet_collector.add_facet("/category/fiction");
+///         searcher.search(&AllQuery, &mut facet_collector).unwrap();
+///
+///         // this object contains count aggregate for all of the facets.
+///         let counts = facet_collector.harvest();
+///
+///         // This lists all of the facet counts
+///         let facets: Vec<(&Facet, u64)> = counts.top_k("/category/fiction", 1);
+///         assert_eq!(facets, vec![
+///             (&Facet::from("/category/fiction/fantasy"), 2)
+///         ]);
+///     }
+///
+///     Ok(())
+/// }
+/// ```
+pub struct FacetCollector {
+    facet_ords: Vec<u64>,
+    field: Field,
+    ff_reader: Option<UnsafeCell<FacetReader>>,
+    segment_counters: Vec<SegmentFacetCounter>,
+
+    // facet_ord -> collapse facet_id
+    current_segment_collapse_mapping: Vec<usize>,
+    // collapse facet_id -> count
+    current_segment_counts: Vec<u64>,
+    // collapse facet_id -> facet_ord
+    current_collapse_facet_ords: Vec<u64>,
+
+    facets: BTreeSet<Facet>,
+}
+
+fn skip<'a, I: Iterator<Item = &'a Facet>>(
+    target: &[u8],
+    collapse_it: &mut Peekable<I>,
+) -> SkipResult {
+    loop {
+        match collapse_it.peek() {
+            Some(facet_bytes) => match facet_bytes.encoded_bytes().cmp(target) {
+                Ordering::Less => {}
+                Ordering::Greater => {
+                    return SkipResult::OverStep;
+                }
+                Ordering::Equal => {
+                    return SkipResult::Reached;
+                }
+            },
+            None => {
+                return SkipResult::End;
+            }
+        }
+        collapse_it.next();
+    }
+}
+
+impl FacetCollector {
+    /// Create a facet collector to collect the facets
+    /// from a specific facet `Field`.
+    ///
+    /// This function does not check whether the field
+    /// is of the proper type.
+    pub fn for_field(field: Field) -> FacetCollector {
+        FacetCollector {
+            facet_ords: Vec::with_capacity(255),
+            segment_counters: Vec::new(),
+            field,
+            ff_reader: None,
+            facets: BTreeSet::new(),
+
+            current_segment_collapse_mapping: Vec::new(),
+            current_collapse_facet_ords: Vec::new(),
+            current_segment_counts: Vec::new(),
+        }
+    }
+
+    /// Adds a facet that we want to record counts
+    ///
+    /// Adding facet `Facet::from("/country")` for instance,
+    /// will record the counts of all of the direct children of the facet country
+    /// (e.g. `/country/FR`, `/country/UK`).
+    ///
+    /// Adding two facets within which one is the prefix of the other is forbidden.
+    /// If you need the correct number of unique documents for two such facets,
+    /// just add them in separate `FacetCollector`.
+    pub fn add_facet<T>(&mut self, facet_from: T)
+    where
+        Facet: From<T>,
+    {
+        let facet = Facet::from(facet_from);
+        for old_facet in &self.facets {
+            assert!(
+                !old_facet.is_prefix_of(&facet),
+                "Tried to add a facet which is a descendant of an already added facet."
+            );
+            assert!(
+                !facet.is_prefix_of(old_facet),
+                "Tried to add a facet which is an ancestor of an already added facet."
+            );
+        }
+        self.facets.insert(facet);
+    }
+
+    fn set_collapse_mapping(&mut self, facet_reader: &FacetReader) {
+        self.current_segment_collapse_mapping.clear();
+        self.current_collapse_facet_ords.clear();
+        self.current_segment_counts.clear();
+        let mut collapse_facet_it = self.facets.iter().peekable();
+        self.current_collapse_facet_ords.push(0);
+        let mut facet_streamer = facet_reader.facet_dict().range().into_stream();
+        if !facet_streamer.advance() {
+            return;
+        }
+        'outer: loop {
+            // at the begining of this loop, facet_streamer
+            // is positionned on a term that has not been processed yet.
+            let skip_result = skip(facet_streamer.key(), &mut collapse_facet_it);
+            match skip_result {
+                SkipResult::Reached => {
+                    // we reach a facet we decided to collapse.
+                    let collapse_depth = facet_depth(facet_streamer.key());
+                    let mut collapsed_id = 0;
+                    self.current_segment_collapse_mapping.push(0);
+                    while facet_streamer.advance() {
+                        let depth = facet_depth(facet_streamer.key());
+                        if depth <= collapse_depth {
+                            continue 'outer;
+                        }
+                        if depth == collapse_depth + 1 {
+                            collapsed_id = self.current_collapse_facet_ords.len();
+                            self.current_collapse_facet_ords
+                                .push(facet_streamer.term_ord());
+                            self.current_segment_collapse_mapping.push(collapsed_id);
+                        } else {
+                            self.current_segment_collapse_mapping.push(collapsed_id);
+                        }
+                    }
+                    break;
+                }
+                SkipResult::End | SkipResult::OverStep => {
+                    self.current_segment_collapse_mapping.push(0);
+                    if !facet_streamer.advance() {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    fn finalize_segment(&mut self) {
+        if self.ff_reader.is_some() {
+            self.segment_counters.push(SegmentFacetCounter {
+                facet_reader: self.ff_reader.take().unwrap().into_inner(),
+                facet_ords: mem::replace(&mut self.current_collapse_facet_ords, Vec::new()),
+                facet_counts: mem::replace(&mut self.current_segment_counts, Vec::new()),
+            });
+        }
+    }
+
+    /// Returns the results of the collection.
+    ///
+    /// This method does not just return the counters,
+    /// it also translates the facet ordinals of the last segment.
+    pub fn harvest(mut self) -> FacetCounts {
+        self.finalize_segment();
+
+        let collapsed_facet_ords: Vec<&[u64]> = self
+            .segment_counters
+            .iter()
+            .map(|segment_counter| &segment_counter.facet_ords[..])
+            .collect();
+        let collapsed_facet_counts: Vec<&[u64]> = self
+            .segment_counters
+            .iter()
+            .map(|segment_counter| &segment_counter.facet_counts[..])
+            .collect();
+
+        let facet_streams = self
+            .segment_counters
+            .iter()
+            .map(|seg_counts| seg_counts.facet_reader.facet_dict().range().into_stream())
+            .collect::<Vec<_>>();
+
+        let mut facet_merger = TermMerger::new(facet_streams);
+        let mut facet_counts = BTreeMap::new();
+
+        while facet_merger.advance() {
+            let count = facet_merger
+                .current_kvs()
+                .iter()
+                .map(|it| {
+                    let seg_ord = it.segment_ord;
+                    let term_ord = it.streamer.term_ord();
+                    collapsed_facet_ords[seg_ord]
+                        .binary_search(&term_ord)
+                        .map(|collapsed_term_id| {
+                            if collapsed_term_id == 0 {
+                                0
+                            } else {
+                                collapsed_facet_counts[seg_ord][collapsed_term_id]
+                            }
+                        })
+                        .unwrap_or(0)
+                })
+                .sum();
+            if count > 0u64 {
+                let bytes: Vec<u8> = facet_merger.key().to_owned();
+                // may create an corrupted facet if the term dicitonary is corrupted
+                let facet = unsafe { Facet::from_encoded(bytes) };
+                facet_counts.insert(facet, count);
+            }
+        }
+        FacetCounts { facet_counts }
+    }
+}
+
+impl Collector for FacetCollector {
    fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
-        self.ff_reader = Some(reader.get_fast_field_reader(self.field)?);
+        self.finalize_segment();
+        let facet_reader = reader.facet_reader(self.field)?;
+        self.set_collapse_mapping(&facet_reader);
+        self.current_segment_counts
+            .resize(self.current_collapse_facet_ords.len(), 0);
+        self.ff_reader = Some(UnsafeCell::new(facet_reader));
        Ok(())
    }

    fn collect(&mut self, doc: DocId, _: Score) {
-        let val = self.ff_reader
-            .as_ref()
-            .expect("collect() was called before set_segment. This should never happen.")
-            .get(doc);
-        *(self.counters.entry(val).or_insert(0)) += 1;
+        let facet_reader: &mut FacetReader = unsafe {
+            &mut *self
+                .ff_reader
+                .as_ref()
+                .expect("collect() was called before set_segment. This should never happen.")
+                .get()
+        };
+        facet_reader.facet_ords(doc, &mut self.facet_ords);
+        let mut previous_collapsed_ord: usize = usize::MAX;
+        for &facet_ord in &self.facet_ords {
+            let collapsed_ord = self.current_segment_collapse_mapping[facet_ord as usize];
+            self.current_segment_counts[collapsed_ord] += if collapsed_ord == previous_collapsed_ord
+            {
+                0
+            } else {
+                1
+            };
+            previous_collapsed_ord = collapsed_ord;
+        }
+    }
+
+    fn requires_scoring(&self) -> bool {
+        false
+    }
+}
+
+/// Intermediary result of the `FacetCollector` that stores
+/// the facet counts for all the segments.
+pub struct FacetCounts {
+    facet_counts: BTreeMap<Facet, u64>,
+}
+
+pub struct FacetChildIterator<'a> {
+    underlying: btree_map::Range<'a, Facet, u64>,
+}
+
+impl<'a> Iterator for FacetChildIterator<'a> {
+    type Item = (&'a Facet, u64);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.underlying.next().map(|(facet, count)| (facet, *count))
+    }
+}
+
+impl FacetCounts {
+    pub fn get<T>(&self, facet_from: T) -> FacetChildIterator
+    where
+        Facet: From<T>,
+    {
+        let facet = Facet::from(facet_from);
+        let left_bound = Bound::Excluded(facet.clone());
+        let right_bound = if facet.is_root() {
+            Bound::Unbounded
+        } else {
+            let mut facet_after_bytes: Vec<u8> = facet.encoded_bytes().to_owned();
+            facet_after_bytes.push(1u8);
+            let facet_after = unsafe { Facet::from_encoded(facet_after_bytes) }; // ok logic
+            Bound::Excluded(facet_after)
+        };
+        let underlying: btree_map::Range<_, _> = self.facet_counts.range((left_bound, right_bound));
+        FacetChildIterator { underlying }
+    }
+
+    pub fn top_k<T>(&self, facet: T, k: usize) -> Vec<(&Facet, u64)>
+    where
+        Facet: From<T>,
+    {
+        let mut heap = BinaryHeap::with_capacity(k);
+        let mut it = self.get(facet);
+
+        for (facet, count) in (&mut it).take(k) {
+            heap.push(Hit { count, facet });
+        }
+
+        let mut lowest_count: u64 = heap.peek().map(|hit| hit.count).unwrap_or(u64::MIN);
+        for (facet, count) in it {
+            if count > lowest_count {
+                lowest_count = count;
+                if let Some(mut head) = heap.peek_mut() {
+                    *head = Hit { count, facet };
+                }
+            }
+        }
+        heap.into_sorted_vec()
+            .into_iter()
+            .map(|hit| (hit.facet, hit.count))
+            .collect::<Vec<_>>()
    }
 }

 #[cfg(test)]
 mod tests {
-
-    use collector::{chain, FacetCollector};
-    use query::QueryParser;
-    use fastfield::{I64FastFieldReader, U64FastFieldReader};
-    use schema::{self, FAST, STRING};
-    use Index;
+    use super::{FacetCollector, FacetCounts};
+    use core::Index;
+    use query::AllQuery;
+    use rand::{thread_rng, Rng};
+    use schema::Field;
+    use schema::{Document, Facet, SchemaBuilder};
+    use std::iter;

    #[test]
-    // create 10 documents, set num field value to 0 or 1 for even/odd ones
-    // make sure we have facet counters correctly filled
-    fn test_facet_collector_results() {
-        let mut schema_builder = schema::SchemaBuilder::new();
-        let num_field_i64 = schema_builder.add_i64_field("num_i64", FAST);
-        let num_field_u64 = schema_builder.add_u64_field("num_u64", FAST);
-        let text_field = schema_builder.add_text_field("text", STRING);
+    fn test_facet_collector_drilldown() {
+        let mut schema_builder = SchemaBuilder::new();
+        let facet_field = schema_builder.add_facet_field("facet");
        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);

-        let index = Index::create_in_ram(schema.clone());
-
-        {
-            let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
-            {
-                for i in 0u64..10u64 {
-                    index_writer.add_document(doc!(
-                        num_field_i64 => ((i as i64) % 3i64) as i64,
-                        num_field_u64 => (i % 2u64) as u64,
-                        text_field => "text"
-                    ));
-                }
-            }
-            assert_eq!(index_writer.commit().unwrap(), 10u64);
+        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        let num_facets: usize = 3 * 4 * 5;
+        let facets: Vec<Facet> = (0..num_facets)
+            .map(|mut n| {
+                let top = n % 3;
+                n /= 3;
+                let mid = n % 4;
+                n /= 4;
+                let leaf = n % 5;
+                Facet::from(&format!("/top{}/mid{}/leaf{}", top, mid, leaf))
+            })
+            .collect();
+        for i in 0..num_facets * 10 {
+            let mut doc = Document::new();
+            doc.add_facet(facet_field, facets[i % num_facets].clone());
+            index_writer.add_document(doc);
        }
-
+        index_writer.commit().unwrap();
        index.load_searchers().unwrap();
        let searcher = index.searcher();
-        let mut ffvf_i64: FacetCollector<I64FastFieldReader> = FacetCollector::new(num_field_i64);
-        let mut ffvf_u64: FacetCollector<U64FastFieldReader> = FacetCollector::new(num_field_u64);

+        let mut facet_collector = FacetCollector::for_field(facet_field);
+        facet_collector.add_facet(Facet::from("/top1"));
+        searcher.search(&AllQuery, &mut facet_collector).unwrap();
+
+        let counts: FacetCounts = facet_collector.harvest();
        {
-            // perform the query
-            let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64);
-            let query_parser = QueryParser::for_index(&index, vec![text_field]);
-            let query = query_parser.parse_query("text:text").unwrap();
-            query.search(&searcher, &mut facet_collectors).unwrap();
+            let facets: Vec<(String, u64)> = counts
+                .get("/top1")
+                .map(|(facet, count)| (facet.to_string(), count))
+                .collect();
+            assert_eq!(
+                facets,
+                [
+                    ("/top1/mid0", 50),
+                    ("/top1/mid1", 50),
+                    ("/top1/mid2", 50),
+                    ("/top1/mid3", 50),
+                ].iter()
+                    .map(|&(facet_str, count)| (String::from(facet_str), count))
+                    .collect::<Vec<_>>()
+            );
        }
+    }

-        assert_eq!(ffvf_u64.counters[&0], 5);
-        assert_eq!(ffvf_u64.counters[&1], 5);
-        assert_eq!(ffvf_i64.counters[&0], 4);
-        assert_eq!(ffvf_i64.counters[&1], 3);
+    #[test]
+    #[should_panic(
+        expected = "Tried to add a facet which is a descendant of \
+                    an already added facet."
+    )]
+    fn test_misused_facet_collector() {
+        let mut facet_collector = FacetCollector::for_field(Field(0));
+        facet_collector.add_facet(Facet::from("/country"));
+        facet_collector.add_facet(Facet::from("/country/europe"));
+    }
+
+    #[test]
+    fn test_non_used_facet_collector() {
+        let mut facet_collector = FacetCollector::for_field(Field(0));
+        facet_collector.add_facet(Facet::from("/country"));
+        facet_collector.add_facet(Facet::from("/countryeurope"));
+    }
+
+    #[test]
+    fn test_facet_collector_topk() {
+        let mut schema_builder = SchemaBuilder::new();
+        let facet_field = schema_builder.add_facet_field("facet");
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+
+        let mut docs: Vec<Document> = vec![("a", 10), ("b", 100), ("c", 7), ("d", 12), ("e", 21)]
+            .into_iter()
+            .flat_map(|(c, count)| {
+                let facet = Facet::from(&format!("/facet_{}", c));
+                let doc = doc!(facet_field => facet);
+                iter::repeat(doc).take(count)
+            })
+            .collect();
+        thread_rng().shuffle(&mut docs[..]);
+
+        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        for doc in docs {
+            index_writer.add_document(doc);
+        }
+        index_writer.commit().unwrap();
+        index.load_searchers().unwrap();
+
+        let searcher = index.searcher();
+
+        let mut facet_collector = FacetCollector::for_field(facet_field);
+        facet_collector.add_facet("/");
+        searcher.search(&AllQuery, &mut facet_collector).unwrap();
+
+        let counts: FacetCounts = facet_collector.harvest();
+        {
+            let facets: Vec<(&Facet, u64)> = counts.top_k("/", 3);
+            assert_eq!(
+                facets,
+                vec![
+                    (&Facet::from("/facet_b"), 100),
+                    (&Facet::from("/facet_e"), 21),
+                    (&Facet::from("/facet_d"), 12),
+                ]
+            );
+        }
+    }
+
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use collector::FacetCollector;
+    use query::AllQuery;
+    use rand::{thread_rng, Rng};
+    use schema::Facet;
+    use schema::SchemaBuilder;
+    use test::Bencher;
+    use Index;
+
+    #[bench]
+    fn bench_facet_collector(b: &mut Bencher) {
+        let mut schema_builder = SchemaBuilder::new();
+        let facet_field = schema_builder.add_facet_field("facet");
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+
+        let mut docs = vec![];
+        for val in 0..50 {
+            let facet = Facet::from(&format!("/facet_{}", val));
+            for _ in 0..val * val {
+                docs.push(doc!(facet_field=>facet.clone()));
+            }
+        }
+        // 40425 docs
+        thread_rng().shuffle(&mut docs[..]);
+
+        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        for doc in docs {
+            index_writer.add_document(doc);
+        }
+        index_writer.commit().unwrap();
+        index.load_searchers().unwrap();
+
+        b.iter(|| {
+            let searcher = index.searcher();
+            let mut facet_collector = FacetCollector::for_field(facet_field);
+            searcher.search(&AllQuery, &mut facet_collector).unwrap();
+        });
    }
 }
--- a/src/collector/int_facet_collector.rs
+++ b/src/collector/int_facet_collector.rs
@@ -0,0 +1,123 @@
+use std::cmp::Eq;
+use std::collections::HashMap;
+use std::hash::Hash;
+
+use collector::Collector;
+use fastfield::FastFieldReader;
+use schema::Field;
+
+use DocId;
+use Result;
+use Score;
+use SegmentReader;
+use SegmentLocalId;
+
+
+/// Facet collector  for i64/u64 fast field
+pub struct IntFacetCollector<T>
+where
+    T: FastFieldReader,
+    T::ValueType: Eq + Hash,
+{
+    counters: HashMap<T::ValueType, u64>,
+    field: Field,
+    ff_reader: Option<T>,
+}
+
+
+impl<T> IntFacetCollector<T>
+where
+    T: FastFieldReader,
+    T::ValueType: Eq + Hash,
+{
+    /// Creates a new facet collector for aggregating a given field.
+    pub fn new(field: Field) -> IntFacetCollector<T> {
+        IntFacetCollector {
+            counters: HashMap::new(),
+            field: field,
+            ff_reader: None,
+        }
+    }
+}
+
+
+impl<T> Collector for IntFacetCollector<T>
+where
+    T: FastFieldReader,
+    T::ValueType: Eq + Hash,
+{
+    fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
+        self.ff_reader = Some(reader.get_fast_field_reader(self.field)?);
+        Ok(())
+    }
+
+    fn collect(&mut self, doc: DocId, _: Score) {
+        let val = self.ff_reader
+            .as_ref()
+            .expect(
+                "collect() was called before set_segment. \
+                This should never happen.",
+            )
+            .get(doc);
+        *(self.counters.entry(val).or_insert(0)) += 1;
+    }
+}
+
+
+
+#[cfg(test)]
+mod tests {
+
+    use collector::{chain, IntFacetCollector};
+    use query::QueryParser;
+    use fastfield::{I64FastFieldReader, U64FastFieldReader};
+    use schema::{self, FAST, STRING};
+    use Index;
+
+    #[test]
+    // create 10 documents, set num field value to 0 or 1 for even/odd ones
+    // make sure we have facet counters correctly filled
+    fn test_facet_collector_results() {
+
+        let mut schema_builder = schema::SchemaBuilder::new();
+        let num_field_i64 = schema_builder.add_i64_field("num_i64", FAST);
+        let num_field_u64 = schema_builder.add_u64_field("num_u64", FAST);
+        let text_field = schema_builder.add_text_field("text", STRING);
+        let schema = schema_builder.build();
+
+        let index = Index::create_in_ram(schema.clone());
+
+        {
+            let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
+            {
+                for i in 0u64..10u64 {
+                    index_writer.add_document(doc!(
+                        num_field_i64 => ((i as i64) % 3i64) as i64,
+                        num_field_u64 => (i % 2u64) as u64,
+                        text_field => "text"
+                    ));
+                }
+            }
+            assert_eq!(index_writer.commit().unwrap(), 10u64);
+        }
+
+        index.load_searchers().unwrap();
+        let searcher = index.searcher();
+        let mut ffvf_i64: IntFacetCollector<I64FastFieldReader> = IntFacetCollector::new(num_field_i64);
+        let mut ffvf_u64: IntFacetCollector<U64FastFieldReader> = IntFacetCollector::new(num_field_u64);
+
+        {
+            // perform the query
+            let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64);
+            let mut query_parser = QueryParser::for_index(index, vec![text_field]);
+            let query = query_parser.parse_query("text:text").unwrap();
+            query.search(&searcher, &mut facet_collectors).unwrap();
+        }
+
+        assert_eq!(ffvf_u64.counters[&0], 5);
+        assert_eq!(ffvf_u64.counters[&1], 5);
+        assert_eq!(ffvf_i64.counters[&0], 4);
+        assert_eq!(ffvf_i64.counters[&1], 3);
+
+    }
+}
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -2,11 +2,11 @@
 Defines how the documents matching a search query should be processed.
 */

-use SegmentReader;
-use SegmentLocalId;
 use DocId;
-use Score;
 use Result;
+use Score;
+use SegmentLocalId;
+use SegmentReader;

 mod count_collector;
 pub use self::count_collector::CountCollector;
@@ -21,7 +21,7 @@ mod facet_collector;
 pub use self::facet_collector::FacetCollector;

 mod chained_collector;
-pub use self::chained_collector::chain;
+pub use self::chained_collector::{chain, ChainedCollector};

 /// Collectors are in charge of collecting and retaining relevant
 /// information from the document found and scored by the query.
@@ -62,6 +62,9 @@ pub trait Collector {
    ) -> Result<()>;
    /// The query pushes the scored document to the collector via this method.
    fn collect(&mut self, doc: DocId, score: Score);
+
+    /// Returns true iff the collector requires to compute scores for documents.
+    fn requires_scoring(&self) -> bool;
 }

 impl<'a, C: Collector> Collector for &'a mut C {
@@ -74,7 +77,11 @@ impl<'a, C: Collector> Collector for &'a mut C {
    }
    /// The query pushes the scored document to the collector via this method.
    fn collect(&mut self, doc: DocId, score: Score) {
-        (*self).collect(doc, score);
+        C::collect(self, doc, score)
+    }
+
+    fn requires_scoring(&self) -> bool {
+        C::requires_scoring(self)
    }
 }

@@ -82,14 +89,13 @@ impl<'a, C: Collector> Collector for &'a mut C {
 pub mod tests {

    use super::*;
-    use test::Bencher;
-    use DocId;
-    use Score;
    use core::SegmentReader;
-    use SegmentLocalId;
-    use fastfield::U64FastFieldReader;
+    use fastfield::BytesFastFieldReader;
    use fastfield::FastFieldReader;
    use schema::Field;
+    use DocId;
+    use Score;
+    use SegmentLocalId;

    /// Stores all of the doc ids.
    /// This collector is only used for tests.
@@ -99,6 +105,7 @@ pub mod tests {
        offset: DocId,
        segment_max_doc: DocId,
        docs: Vec<DocId>,
+        scores: Vec<Score>,
    }

    impl TestCollector {
@@ -106,14 +113,19 @@ pub mod tests {
        pub fn docs(self) -> Vec<DocId> {
            self.docs
        }
+
+        pub fn scores(self) -> Vec<Score> {
+            self.scores
+        }
    }

    impl Default for TestCollector {
        fn default() -> TestCollector {
            TestCollector {
-                docs: Vec::new(),
                offset: 0,
                segment_max_doc: 0,
+                docs: Vec::new(),
+                scores: Vec::new(),
            }
        }
    }
@@ -125,8 +137,13 @@ pub mod tests {
            Ok(())
        }

-        fn collect(&mut self, doc: DocId, _score: Score) {
+        fn collect(&mut self, doc: DocId, score: Score) {
            self.docs.push(doc + self.offset);
+            self.scores.push(score);
+        }
+
+        fn requires_scoring(&self) -> bool {
+            true
        }
    }

@@ -137,14 +154,14 @@ pub mod tests {
    pub struct FastFieldTestCollector {
        vals: Vec<u64>,
        field: Field,
-        ff_reader: Option<U64FastFieldReader>,
+        ff_reader: Option<FastFieldReader<u64>>,
    }

    impl FastFieldTestCollector {
        pub fn for_field(field: Field) -> FastFieldTestCollector {
            FastFieldTestCollector {
                vals: Vec::new(),
-                field: field,
+                field,
                ff_reader: None,
            }
        }
@@ -156,7 +173,7 @@ pub mod tests {

    impl Collector for FastFieldTestCollector {
        fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
-            self.ff_reader = Some(reader.get_fast_field_reader(self.field)?);
+            self.ff_reader = Some(reader.fast_field_reader(self.field)?);
            Ok(())
        }

@@ -164,8 +181,57 @@ pub mod tests {
            let val = self.ff_reader.as_ref().unwrap().get(doc);
            self.vals.push(val);
        }
+        fn requires_scoring(&self) -> bool {
+            false
+        }
    }

+    /// Collects in order all of the fast field bytes for all of the
+    /// docs in the `DocSet`
+    ///
+    /// This collector is mainly useful for tests.
+    pub struct BytesFastFieldTestCollector {
+        vals: Vec<u8>,
+        field: Field,
+        ff_reader: Option<BytesFastFieldReader>,
+    }
+
+    impl BytesFastFieldTestCollector {
+        pub fn for_field(field: Field) -> BytesFastFieldTestCollector {
+            BytesFastFieldTestCollector {
+                vals: Vec::new(),
+                field,
+                ff_reader: None,
+            }
+        }
+
+        pub fn vals(self) -> Vec<u8> {
+            self.vals
+        }
+    }
+
+    impl Collector for BytesFastFieldTestCollector {
+        fn set_segment(&mut self, _segment_local_id: u32, segment: &SegmentReader) -> Result<()> {
+            self.ff_reader = Some(segment.bytes_fast_field_reader(self.field)?);
+            Ok(())
+        }
+
+        fn collect(&mut self, doc: u32, _score: f32) {
+            let val = self.ff_reader.as_ref().unwrap().get_val(doc);
+            self.vals.extend(val);
+        }
+
+        fn requires_scoring(&self) -> bool {
+            false
+        }
+    }
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+    use collector::{Collector, CountCollector};
+    use test::Bencher;
+
    #[bench]
    fn build_collector(b: &mut Bencher) {
        b.iter(|| {
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -1,14 +1,66 @@
 use super::Collector;
 use DocId;
-use Score;
 use Result;
-use SegmentReader;
+use Score;
 use SegmentLocalId;
+use SegmentReader;

 /// Multicollector makes it possible to collect on more than one collector.
 /// It should only be used for use cases where the Collector types is unknown
 /// at compile time.
 /// If the type of the collectors is known, you should prefer to use `ChainedCollector`.
+///
+/// ```rust
+/// #[macro_use]
+/// extern crate tantivy;
+/// use tantivy::schema::{SchemaBuilder, TEXT};
+/// use tantivy::{Index, Result};
+/// use tantivy::collector::{CountCollector, TopCollector, MultiCollector};
+/// use tantivy::query::QueryParser;
+///
+/// # fn main() { example().unwrap(); }
+/// fn example() -> Result<()> {
+///     let mut schema_builder = SchemaBuilder::new();
+///     let title = schema_builder.add_text_field("title", TEXT);
+///     let schema = schema_builder.build();
+///     let index = Index::create_in_ram(schema);
+///     {
+///         let mut index_writer = index.writer(3_000_000)?;
+///         index_writer.add_document(doc!(
+///             title => "The Name of the Wind",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "The Diary of Muadib",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "A Dairy Cow",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "The Diary of a Young Girl",
+///         ));
+///         index_writer.commit().unwrap();
+///     }
+///
+///     index.load_searchers()?;
+///     let searcher = index.searcher();
+///
+///     {
+///         let mut top_collector = TopCollector::with_limit(2);
+///         let mut count_collector = CountCollector::default();
+///         {
+///             let mut collectors =
+///                 MultiCollector::from(vec![&mut top_collector, &mut count_collector]);
+///             let query_parser = QueryParser::for_index(&index, vec![title]);
+///             let query = query_parser.parse_query("diary")?;
+///             searcher.search(&*query, &mut collectors).unwrap();
+///         }
+///         assert_eq!(count_collector.count(), 2);
+///         assert!(top_collector.at_capacity());
+///     }
+///
+///     Ok(())
+/// }
+/// ```
 pub struct MultiCollector<'a> {
    collectors: Vec<&'a mut Collector>,
 }
@@ -16,9 +68,7 @@ pub struct MultiCollector<'a> {
 impl<'a> MultiCollector<'a> {
    /// Constructor
    pub fn from(collectors: Vec<&'a mut Collector>) -> MultiCollector {
-        MultiCollector {
-            collectors: collectors,
-        }
+        MultiCollector { collectors }
    }
 }

@@ -29,7 +79,7 @@ impl<'a> Collector for MultiCollector<'a> {
        segment: &SegmentReader,
    ) -> Result<()> {
        for collector in &mut self.collectors {
-            try!(collector.set_segment(segment_local_id, segment));
+            collector.set_segment(segment_local_id, segment)?;
        }
        Ok(())
    }
@@ -39,6 +89,11 @@ impl<'a> Collector for MultiCollector<'a> {
            collector.collect(doc, score);
        }
    }
+    fn requires_scoring(&self) -> bool {
+        self.collectors
+            .iter()
+            .any(|collector| collector.requires_scoring())
+    }
 }

 #[cfg(test)]
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -1,12 +1,12 @@
 use super::Collector;
-use SegmentReader;
-use SegmentLocalId;
-use DocAddress;
-use Result;
-use std::collections::BinaryHeap;
 use std::cmp::Ordering;
+use std::collections::BinaryHeap;
+use DocAddress;
 use DocId;
+use Result;
 use Score;
+use SegmentLocalId;
+use SegmentReader;

 // Rust heap is a max-heap and we need a min heap.
 #[derive(Clone, Copy)]
@@ -43,7 +43,61 @@ impl Eq for GlobalScoredDoc {}
 /// with the best scores.
 ///
 /// The implementation is based on a `BinaryHeap`.
-/// The theorical complexity is `O(n log K)`.
+/// The theorical complexity for collecting the top `K` out of `n` documents
+/// is `O(n log K)`.
+///
+/// ```rust
+/// #[macro_use]
+/// extern crate tantivy;
+/// use tantivy::schema::{SchemaBuilder, TEXT};
+/// use tantivy::{Index, Result, DocId, Score};
+/// use tantivy::collector::TopCollector;
+/// use tantivy::query::QueryParser;
+///
+/// # fn main() { example().unwrap(); }
+/// fn example() -> Result<()> {
+///     let mut schema_builder = SchemaBuilder::new();
+///     let title = schema_builder.add_text_field("title", TEXT);
+///     let schema = schema_builder.build();
+///     let index = Index::create_in_ram(schema);
+///     {
+///         let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
+///         index_writer.add_document(doc!(
+///             title => "The Name of the Wind",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "The Diary of Muadib",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "A Dairy Cow",
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "The Diary of a Young Girl",
+///         ));
+///         index_writer.commit().unwrap();
+///     }
+///
+///     index.load_searchers()?;
+///     let searcher = index.searcher();
+///
+///     {
+///	        let mut top_collector = TopCollector::with_limit(2);
+///         let query_parser = QueryParser::for_index(&index, vec![title]);
+///         let query = query_parser.parse_query("diary")?;
+///         searcher.search(&*query, &mut top_collector).unwrap();
+///
+///         let score_docs: Vec<(Score, DocId)> = top_collector
+///           .score_docs()
+///           .into_iter()
+///           .map(|(score, doc_address)| (score, doc_address.doc()))
+///           .collect();
+///
+///         assert_eq!(score_docs, vec![(0.7261542, 1), (0.6099695, 3)]);
+///     }
+///
+///     Ok(())
+/// }
+/// ```
 pub struct TopCollector {
    limit: usize,
    heap: BinaryHeap<GlobalScoredDoc>,
@@ -60,7 +114,7 @@ impl TopCollector {
            panic!("Limit must be strictly greater than 0.");
        }
        TopCollector {
-            limit: limit,
+            limit,
            heap: BinaryHeap::with_capacity(limit),
            segment_id: 0,
        }
@@ -107,11 +161,13 @@ impl Collector for TopCollector {
    fn collect(&mut self, doc: DocId, score: Score) {
        if self.at_capacity() {
            // It's ok to unwrap as long as a limit of 0 is forbidden.
-            let limit_doc: GlobalScoredDoc = *self.heap
+            let limit_doc: GlobalScoredDoc = *self
+                .heap
                .peek()
                .expect("Top collector with size 0 is forbidden");
            if limit_doc.score < score {
-                let mut mut_head = self.heap
+                let mut mut_head = self
+                    .heap
                    .peek_mut()
                    .expect("Top collector with size 0 is forbidden");
                mut_head.score = score;
@@ -119,21 +175,25 @@ impl Collector for TopCollector {
            }
        } else {
            let wrapped_doc = GlobalScoredDoc {
-                score: score,
+                score,
                doc_address: DocAddress(self.segment_id, doc),
            };
            self.heap.push(wrapped_doc);
        }
    }
+
+    fn requires_scoring(&self) -> bool {
+        true
+    }
 }

 #[cfg(test)]
 mod tests {

    use super::*;
+    use collector::Collector;
    use DocId;
    use Score;
-    use collector::Collector;

    #[test]
    fn test_top_collector_not_at_capacity() {
@@ -182,4 +242,5 @@ mod tests {
    fn test_top_0() {
        TopCollector::with_limit(0);
    }
+
 }
--- a/src/common/bitpacker.rs
+++ b/src/common/bitpacker.rs
@@ -1,67 +1,39 @@
-use std::io::Write;
-use std::io;
 use common::serialize::BinarySerializable;
+use std::io;
+use std::io::Write;
 use std::mem;
 use std::ops::Deref;
+use std::ptr;

-/// Computes the number of bits that will be used for bitpacking.
-///
-/// In general the target is the minimum number of bits
-/// required to express the amplitude given in argument.
-///
-/// e.g. If the amplitude is 10, we can store all ints on simply 4bits.
-///
-/// The logic is slightly more convoluted here as for optimization
-/// reasons, we want to ensure that a value spawns over at most 8 bytes
-/// of aligns bytes.
-///
-/// Spanning over 9 bytes is possible for instance, if we do
-/// bitpacking with an amplitude of 63 bits.
-/// In this case, the second int will start on bit
-/// 63 (which belongs to byte 7) and ends at byte 15;
-/// Hence 9 bytes (from byte 7 to byte 15 included).
-///
-/// To avoid this, we force the number of bits to 64bits
-/// when the result is greater than `64-8 = 56 bits`.
-///
-/// Note that this only affects rare use cases spawning over
-/// a very large range of values. Even in this case, it results
-/// in an extra cost of at most 12% compared to the optimal
-/// number of bits.
-pub fn compute_num_bits(amplitude: u64) -> u8 {
-    let amplitude = (64u32 - amplitude.leading_zeros()) as u8;
-    if amplitude <= 64 - 8 {
-        amplitude
-    } else {
-        64
-    }
-}
-
-pub struct BitPacker {
+pub(crate) struct BitPacker {
    mini_buffer: u64,
    mini_buffer_written: usize,
-    num_bits: usize,
 }

 impl BitPacker {
-    pub fn new(num_bits: usize) -> BitPacker {
+    pub fn new() -> BitPacker {
        BitPacker {
            mini_buffer: 0u64,
            mini_buffer_written: 0,
-            num_bits,
        }
    }

-    pub fn write<TWrite: Write>(&mut self, val: u64, output: &mut TWrite) -> io::Result<()> {
+    pub fn write<TWrite: Write>(
+        &mut self,
+        val: u64,
+        num_bits: u8,
+        output: &mut TWrite,
+    ) -> io::Result<()> {
        let val_u64 = val as u64;
-        if self.mini_buffer_written + self.num_bits > 64 {
+        let num_bits = num_bits as usize;
+        if self.mini_buffer_written + num_bits > 64 {
            self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32);
            self.mini_buffer.serialize(output)?;
            self.mini_buffer = val_u64.wrapping_shr((64 - self.mini_buffer_written) as u32);
-            self.mini_buffer_written = self.mini_buffer_written + (self.num_bits as usize) - 64;
+            self.mini_buffer_written = self.mini_buffer_written + num_bits - 64;
        } else {
            self.mini_buffer |= val_u64 << self.mini_buffer_written;
-            self.mini_buffer_written += self.num_bits;
+            self.mini_buffer_written += num_bits;
            if self.mini_buffer_written == 64 {
                self.mini_buffer.serialize(output)?;
                self.mini_buffer_written = 0;
@@ -71,10 +43,10 @@ impl BitPacker {
        Ok(())
    }

-    pub(crate) fn flush<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
+    pub fn flush<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
        if self.mini_buffer_written > 0 {
            let num_bytes = (self.mini_buffer_written + 7) / 8;
-            let arr: [u8; 8] = unsafe { mem::transmute::<u64, [u8; 8]>(self.mini_buffer) };
+            let arr: [u8; 8] = unsafe { mem::transmute::<u64, [u8; 8]>(self.mini_buffer.to_le()) };
            output.write_all(&arr[..num_bytes])?;
            self.mini_buffer_written = 0;
        }
@@ -89,6 +61,7 @@ impl BitPacker {
    }
 }

+#[derive(Clone)]
 pub struct BitUnpacker<Data>
 where
    Data: Deref<Target = [u8]>,
@@ -102,14 +75,14 @@ impl<Data> BitUnpacker<Data>
 where
    Data: Deref<Target = [u8]>,
 {
-    pub fn new(data: Data, num_bits: usize) -> BitUnpacker<Data> {
+    pub fn new(data: Data, num_bits: u8) -> BitUnpacker<Data> {
        let mask: u64 = if num_bits == 64 {
            !0u64
        } else {
            (1u64 << num_bits) - 1u64
        };
        BitUnpacker {
-            num_bits,
+            num_bits: num_bits as usize,
            mask,
            data,
        }
@@ -117,7 +90,7 @@ where

    pub fn get(&self, idx: usize) -> u64 {
        if self.num_bits == 0 {
-            return 0;
+            return 0u64;
        }
        let data: &[u8] = &*self.data;
        let num_bits = self.num_bits;
@@ -125,37 +98,24 @@ where
        let addr_in_bits = idx * num_bits;
        let addr = addr_in_bits >> 3;
        let bit_shift = addr_in_bits & 7;
-        if cfg!(feature = "simdcompression") {
-            // for simdcompression,
-            // the bitpacker is only used for fastfields,
-            // and we expect them to be always padded.
-            debug_assert!(
-                addr + 8 <= data.len(),
-                "The fast field field should have been padded with 7 bytes."
-            );
-            let val_unshifted_unmasked: u64 =
-                unsafe { *(data[addr..].as_ptr() as *const u64) };
-            let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
-            (val_shifted & mask)
-        } else {
-            let val_unshifted_unmasked: u64 = if addr + 8 <= data.len() {
-                unsafe { *(data[addr..].as_ptr() as *const u64) }
-            } else {
-                let mut buffer = [0u8; 8];
-                for i in addr..data.len() {
-                    buffer[i - addr] += data[i];
-                }
-                unsafe { *(buffer[..].as_ptr() as *const u64) }
-            };
-            let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
-            (val_shifted & mask)
-        }
+        debug_assert!(
+            addr + 8 <= data.len(),
+            "The fast field field should have been padded with 7 bytes."
+        );
+        let val_unshifted_unmasked: u64 =
+            u64::from_le(unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) });
+        let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
+        val_shifted & mask
    }

+    /// Reads a range of values from the fast field.
+    ///
+    /// The range of values read is from
+    /// `[start..start + output.len()[`
    pub fn get_range(&self, start: u32, output: &mut [u64]) {
        if self.num_bits == 0 {
            for val in output.iter_mut() {
-                *val = 0;
+                *val = 0u64;
            }
        } else {
            let data: &[u8] = &*self.data;
@@ -166,7 +126,7 @@ where
                let addr = addr_in_bits >> 3;
                let bit_shift = addr_in_bits & 7;
                let val_unshifted_unmasked: u64 =
-                    unsafe { *(data[addr..].as_ptr() as *const u64) };
+                    unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
                let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
                *output_val = val_shifted & mask;
                addr_in_bits += num_bits;
@@ -177,37 +137,25 @@ where

 #[cfg(test)]
 mod test {
-    use super::{compute_num_bits, BitPacker, BitUnpacker};
+    use super::{BitPacker, BitUnpacker};

-    #[test]
-    fn test_compute_num_bits() {
-        assert_eq!(compute_num_bits(1), 1u8);
-        assert_eq!(compute_num_bits(0), 0u8);
-        assert_eq!(compute_num_bits(2), 2u8);
-        assert_eq!(compute_num_bits(3), 2u8);
-        assert_eq!(compute_num_bits(4), 3u8);
-        assert_eq!(compute_num_bits(255), 8u8);
-        assert_eq!(compute_num_bits(256), 9u8);
-        assert_eq!(compute_num_bits(5_000_000_000), 33u8);
-    }
-
-    fn create_fastfield_bitpacker(len: usize, num_bits: usize) -> (BitUnpacker<Vec<u8>>, Vec<u64>) {
+    fn create_fastfield_bitpacker(len: usize, num_bits: u8) -> (BitUnpacker<Vec<u8>>, Vec<u64>) {
        let mut data = Vec::new();
-        let mut bitpacker = BitPacker::new(num_bits);
-        let max_val: u64 = (1 << num_bits) - 1;
+        let mut bitpacker = BitPacker::new();
+        let max_val: u64 = (1u64 << num_bits as u64) - 1u64;
        let vals: Vec<u64> = (0u64..len as u64)
            .map(|i| if max_val == 0 { 0 } else { i % max_val })
            .collect();
        for &val in &vals {
-            bitpacker.write(val, &mut data).unwrap();
+            bitpacker.write(val, num_bits, &mut data).unwrap();
        }
        bitpacker.close(&mut data).unwrap();
-        assert_eq!(data.len(), (num_bits * len + 7) / 8 + 7);
+        assert_eq!(data.len(), ((num_bits as usize) * len + 7) / 8 + 7);
        let bitunpacker = BitUnpacker::new(data, num_bits);
        (bitunpacker, vals)
    }

-    fn test_bitpacker_util(len: usize, num_bits: usize) {
+    fn test_bitpacker_util(len: usize, num_bits: u8) {
        let (bitunpacker, vals) = create_fastfield_bitpacker(len, num_bits);
        for (i, val) in vals.iter().enumerate() {
            assert_eq!(bitunpacker.get(i), *val);
--- a/src/common/bitset.rs
+++ b/src/common/bitset.rs
@@ -0,0 +1,395 @@
+use std::fmt;
+use std::u64;
+
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub(crate) struct TinySet(u64);
+
+impl fmt::Debug for TinySet {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.into_iter().collect::<Vec<u32>>().fmt(f)
+    }
+}
+
+pub struct TinySetIterator(TinySet);
+impl Iterator for TinySetIterator {
+    type Item = u32;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.0.pop_lowest()
+    }
+}
+
+impl IntoIterator for TinySet {
+    type Item = u32;
+    type IntoIter = TinySetIterator;
+    fn into_iter(self) -> Self::IntoIter {
+        TinySetIterator(self)
+    }
+}
+
+impl TinySet {
+    /// Returns an empty `TinySet`.
+    pub fn empty() -> TinySet {
+        TinySet(0u64)
+    }
+
+    /// Returns the complement of the set in `[0, 64[`.
+    fn complement(&self) -> TinySet {
+        TinySet(!self.0)
+    }
+
+    /// Returns true iff the `TinySet` contains the element `el`.
+    pub fn contains(&self, el: u32) -> bool {
+        !self.intersect(TinySet::singleton(el)).is_empty()
+    }
+
+    /// Returns the intersection of `self` and `other`
+    pub fn intersect(&self, other: TinySet) -> TinySet {
+        TinySet(self.0 & other.0)
+    }
+
+    /// Creates a new `TinySet` containing only one element
+    /// within `[0; 64[`
+    #[inline(always)]
+    pub fn singleton(el: u32) -> TinySet {
+        TinySet(1u64 << u64::from(el))
+    }
+
+    /// Insert a new element within [0..64[
+    #[inline(always)]
+    pub fn insert(self, el: u32) -> TinySet {
+        self.union(TinySet::singleton(el))
+    }
+
+    /// Insert a new element within [0..64[
+    #[inline(always)]
+    pub fn insert_mut(&mut self, el: u32) -> bool {
+        let old = *self;
+        *self = old.insert(el);
+        old != *self
+    }
+
+    /// Returns the union of two tinysets
+    #[inline(always)]
+    pub fn union(self, other: TinySet) -> TinySet {
+        TinySet(self.0 | other.0)
+    }
+
+    /// Returns true iff the `TinySet` is empty.
+    #[inline(always)]
+    pub fn is_empty(&self) -> bool {
+        self.0 == 0u64
+    }
+
+    /// Returns the lowest element in the `TinySet`
+    /// and removes it.
+    #[inline(always)]
+    pub fn pop_lowest(&mut self) -> Option<u32> {
+        if self.is_empty() {
+            None
+        } else {
+            let lowest = self.0.trailing_zeros() as u32;
+            self.0 ^= TinySet::singleton(lowest).0;
+            Some(lowest)
+        }
+    }
+
+    /// Returns a `TinySet` than contains all values up
+    /// to limit excluded.
+    ///
+    /// The limit is assumed to be strictly lower than 64.
+    pub fn range_lower(upper_bound: u32) -> TinySet {
+        TinySet((1u64 << u64::from(upper_bound % 64u32)) - 1u64)
+    }
+
+    /// Returns a `TinySet` that contains all values greater
+    /// or equal to the given limit, included. (and up to 63)
+    ///
+    /// The limit is assumed to be strictly lower than 64.
+    pub fn range_greater_or_equal(from_included: u32) -> TinySet {
+        TinySet::range_lower(from_included).complement()
+    }
+
+    pub fn clear(&mut self) {
+        self.0 = 0u64;
+    }
+
+    pub fn len(&self) -> u32 {
+        self.0.count_ones()
+    }
+}
+
+#[derive(Clone)]
+pub struct BitSet {
+    tinysets: Box<[TinySet]>,
+    len: usize, //< Technically it should be u32, but we
+    // count multiple inserts.
+    // `usize` guards us from overflow.
+    max_value: u32,
+}
+
+fn num_buckets(max_val: u32) -> u32 {
+    (max_val + 63u32) / 64u32
+}
+
+impl BitSet {
+    /// Create a new `BitSet` that may contain elements
+    /// within `[0, max_val[`.
+    pub fn with_max_value(max_value: u32) -> BitSet {
+        let num_buckets = num_buckets(max_value);
+        let tinybisets = vec![TinySet::empty(); num_buckets as usize].into_boxed_slice();
+        BitSet {
+            tinysets: tinybisets,
+            len: 0,
+            max_value,
+        }
+    }
+
+    /// Removes all elements from the `BitSet`.
+    pub fn clear(&mut self) {
+        for tinyset in self.tinysets.iter_mut() {
+            *tinyset = TinySet::empty();
+        }
+    }
+
+    /// Returns the number of elements in the `BitSet`.
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Inserts an element in the `BitSet`
+    pub fn insert(&mut self, el: u32) {
+        // we do not check saturated els.
+        let higher = el / 64u32;
+        let lower = el % 64u32;
+        self.len += if self.tinysets[higher as usize].insert_mut(lower) {
+            1
+        } else {
+            0
+        };
+    }
+
+    /// Returns true iff the elements is in the `BitSet`.
+    pub fn contains(&self, el: u32) -> bool {
+        self.tinyset(el / 64u32).contains(el % 64)
+    }
+
+    /// Returns the first non-empty `TinySet` associated to a bucket lower
+    /// or greater than bucket.
+    ///
+    /// Reminder: the tiny set with the bucket `bucket`, represents the
+    /// elements from `bucket * 64` to `(bucket+1) * 64`.
+    pub(crate) fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
+        self.tinysets[bucket as usize..]
+            .iter()
+            .cloned()
+            .position(|tinyset| !tinyset.is_empty())
+            .map(|delta_bucket| bucket + delta_bucket as u32)
+    }
+
+    pub fn max_value(&self) -> u32 {
+        self.max_value
+    }
+
+    /// Returns the tiny bitset representing the
+    /// the set restricted to the number range from
+    /// `bucket * 64` to `(bucket + 1) * 64`.
+    pub(crate) fn tinyset(&self, bucket: u32) -> TinySet {
+        self.tinysets[bucket as usize]
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::BitSet;
+    use super::TinySet;
+    use docset::DocSet;
+    use query::BitSetDocSet;
+    use std::collections::BTreeSet;
+    use std::collections::HashSet;
+    use tests;
+    use tests::generate_nonunique_unsorted;
+
+    #[test]
+    fn test_tiny_set() {
+        assert!(TinySet::empty().is_empty());
+        {
+            let mut u = TinySet::empty().insert(1u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert!(u.pop_lowest().is_none())
+        }
+        {
+            let mut u = TinySet::empty().insert(1u32).insert(1u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert!(u.pop_lowest().is_none())
+        }
+        {
+            let mut u = TinySet::empty().insert(2u32);
+            assert_eq!(u.pop_lowest(), Some(2u32));
+            u.insert_mut(1u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty().insert(63u32);
+            assert_eq!(u.pop_lowest(), Some(63u32));
+            assert!(u.pop_lowest().is_none());
+        }
+    }
+
+    #[test]
+    fn test_bitset() {
+        let test_against_hashset = |els: &[u32], max_value: u32| {
+            let mut hashset: HashSet<u32> = HashSet::new();
+            let mut bitset = BitSet::with_max_value(max_value);
+            for &el in els {
+                assert!(el < max_value);
+                hashset.insert(el);
+                bitset.insert(el);
+            }
+            for el in 0..max_value {
+                assert_eq!(hashset.contains(&el), bitset.contains(el));
+            }
+            assert_eq!(bitset.max_value(), max_value);
+        };
+
+        test_against_hashset(&[], 0);
+        test_against_hashset(&[], 1);
+        test_against_hashset(&[0u32], 1);
+        test_against_hashset(&[0u32], 100);
+        test_against_hashset(&[1u32, 2u32], 4);
+        test_against_hashset(&[99u32], 100);
+        test_against_hashset(&[63u32], 64);
+        test_against_hashset(&[62u32, 63u32], 64);
+    }
+
+    #[test]
+    fn test_bitset_large() {
+        let arr = generate_nonunique_unsorted(1_000_000, 50_000);
+        let mut btreeset: BTreeSet<u32> = BTreeSet::new();
+        let mut bitset = BitSet::with_max_value(1_000_000);
+        for el in arr {
+            btreeset.insert(el);
+            bitset.insert(el);
+        }
+        for i in 0..1_000_000 {
+            assert_eq!(btreeset.contains(&i), bitset.contains(i));
+        }
+        assert_eq!(btreeset.len(), bitset.len());
+        let mut bitset_docset = BitSetDocSet::from(bitset);
+        for el in btreeset.into_iter() {
+            bitset_docset.advance();
+            assert_eq!(bitset_docset.doc(), el);
+        }
+        assert!(!bitset_docset.advance());
+    }
+
+    #[test]
+    fn test_bitset_num_buckets() {
+        use super::num_buckets;
+        assert_eq!(num_buckets(0u32), 0);
+        assert_eq!(num_buckets(1u32), 1);
+        assert_eq!(num_buckets(64u32), 1);
+        assert_eq!(num_buckets(65u32), 2);
+        assert_eq!(num_buckets(128u32), 2);
+        assert_eq!(num_buckets(129u32), 3);
+    }
+
+    #[test]
+    fn test_tinyset_range() {
+        assert_eq!(
+            TinySet::range_lower(3).into_iter().collect::<Vec<u32>>(),
+            [0, 1, 2]
+        );
+        assert!(TinySet::range_lower(0).is_empty());
+        assert_eq!(
+            TinySet::range_lower(63).into_iter().collect::<Vec<u32>>(),
+            (0u32..63u32).collect::<Vec<_>>()
+        );
+        assert_eq!(
+            TinySet::range_lower(1).into_iter().collect::<Vec<u32>>(),
+            [0]
+        );
+        assert_eq!(
+            TinySet::range_lower(2).into_iter().collect::<Vec<u32>>(),
+            [0, 1]
+        );
+        assert_eq!(
+            TinySet::range_greater_or_equal(3)
+                .into_iter()
+                .collect::<Vec<u32>>(),
+            (3u32..64u32).collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn test_bitset_len() {
+        let mut bitset = BitSet::with_max_value(1_000);
+        assert_eq!(bitset.len(), 0);
+        bitset.insert(3u32);
+        assert_eq!(bitset.len(), 1);
+        bitset.insert(103u32);
+        assert_eq!(bitset.len(), 2);
+        bitset.insert(3u32);
+        assert_eq!(bitset.len(), 2);
+        bitset.insert(103u32);
+        assert_eq!(bitset.len(), 2);
+        bitset.insert(104u32);
+        assert_eq!(bitset.len(), 3);
+    }
+
+    #[test]
+    fn test_bitset_clear() {
+        let mut bitset = BitSet::with_max_value(1_000);
+        let els = tests::sample(1_000, 0.01f32);
+        for &el in &els {
+            bitset.insert(el);
+        }
+        assert!(els.iter().all(|el| bitset.contains(*el)));
+        bitset.clear();
+        for el in 0u32..1000u32 {
+            assert!(!bitset.contains(el));
+        }
+    }
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use super::BitSet;
+    use super::TinySet;
+    use test;
+
+    #[bench]
+    fn bench_tinyset_pop(b: &mut test::Bencher) {
+        b.iter(|| {
+            let mut tinyset = TinySet::singleton(test::black_box(31u32));
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+        });
+    }
+
+    #[bench]
+    fn bench_tinyset_sum(b: &mut test::Bencher) {
+        let tiny_set = TinySet::empty().insert(10u32).insert(14u32).insert(21u32);
+        b.iter(|| {
+            assert_eq!(test::black_box(tiny_set).into_iter().sum::<u32>(), 45u32);
+        });
+    }
+
+    #[bench]
+    fn bench_tinyarr_sum(b: &mut test::Bencher) {
+        let v = [10u32, 14u32, 21u32];
+        b.iter(|| test::black_box(v).iter().cloned().sum::<u32>());
+    }
+
+    #[bench]
+    fn bench_bitset_initialize(b: &mut test::Bencher) {
+        b.iter(|| BitSet::with_max_value(1_000_000));
+    }
+}
--- a/src/common/composite_file.rs
+++ b/src/common/composite_file.rs
@@ -1,17 +1,43 @@
-use std::io::Write;
-use common::CountingWriter;
-use std::collections::HashMap;
-use schema::Field;
-use common::VInt;
-use directory::WritePtr;
-use std::io;
-use directory::ReadOnlySource;
 use common::BinarySerializable;
+use common::CountingWriter;
+use common::VInt;
+use directory::ReadOnlySource;
+use directory::WritePtr;
+use schema::Field;
+use std::collections::HashMap;
+use std::io::Write;
+use std::io::{self, Read};
+
+#[derive(Eq, PartialEq, Hash, Copy, Ord, PartialOrd, Clone, Debug)]
+pub struct FileAddr {
+    field: Field,
+    idx: usize,
+}
+
+impl FileAddr {
+    fn new(field: Field, idx: usize) -> FileAddr {
+        FileAddr { field, idx }
+    }
+}
+
+impl BinarySerializable for FileAddr {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+        self.field.serialize(writer)?;
+        VInt(self.idx as u64).serialize(writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+        let field = Field::deserialize(reader)?;
+        let idx = VInt::deserialize(reader)?.0 as usize;
+        Ok(FileAddr { field, idx })
+    }
+}

 /// A `CompositeWrite` is used to write a `CompositeFile`.
 pub struct CompositeWrite<W = WritePtr> {
    write: CountingWriter<W>,
-    offsets: HashMap<Field, usize>,
+    offsets: HashMap<FileAddr, usize>,
 }

 impl<W: Write> CompositeWrite<W> {
@@ -26,9 +52,15 @@ impl<W: Write> CompositeWrite<W> {

    /// Start writing a new field.
    pub fn for_field(&mut self, field: Field) -> &mut CountingWriter<W> {
+        self.for_field_with_idx(field, 0)
+    }
+
+    /// Start writing a new field.
+    pub fn for_field_with_idx(&mut self, field: Field, idx: usize) -> &mut CountingWriter<W> {
        let offset = self.write.written_bytes();
-        assert!(!self.offsets.contains_key(&field));
-        self.offsets.insert(field, offset);
+        let file_addr = FileAddr::new(field, idx);
+        assert!(!self.offsets.contains_key(&file_addr));
+        self.offsets.insert(file_addr, offset);
        &mut self.write
    }

@@ -40,18 +72,19 @@ impl<W: Write> CompositeWrite<W> {
        let footer_offset = self.write.written_bytes();
        VInt(self.offsets.len() as u64).serialize(&mut self.write)?;

-        let mut offset_fields: Vec<_> = self.offsets
+        let mut offset_fields: Vec<_> = self
+            .offsets
            .iter()
-            .map(|(field, offset)| (offset, field))
+            .map(|(file_addr, offset)| (*offset, *file_addr))
            .collect();

        offset_fields.sort();

        let mut prev_offset = 0;
-        for (offset, field) in offset_fields {
+        for (offset, file_addr) in offset_fields {
            VInt((offset - prev_offset) as u64).serialize(&mut self.write)?;
-            field.serialize(&mut self.write)?;
-            prev_offset = *offset;
+            file_addr.serialize(&mut self.write)?;
+            prev_offset = offset;
        }

        let footer_len = (self.write.written_bytes() - footer_offset) as u32;
@@ -70,7 +103,7 @@ impl<W: Write> CompositeWrite<W> {
 #[derive(Clone)]
 pub struct CompositeFile {
    data: ReadOnlySource,
-    offsets_index: HashMap<Field, (usize, usize)>,
+    offsets_index: HashMap<FileAddr, (usize, usize)>,
 }

 impl CompositeFile {
@@ -86,7 +119,7 @@ impl CompositeFile {
        let mut footer_buffer = footer_data.as_slice();
        let num_fields = VInt::deserialize(&mut footer_buffer)?.0 as usize;

-        let mut fields = vec![];
+        let mut file_addrs = vec![];
        let mut offsets = vec![];

        let mut field_index = HashMap::new();
@@ -94,16 +127,16 @@ impl CompositeFile {
        let mut offset = 0;
        for _ in 0..num_fields {
            offset += VInt::deserialize(&mut footer_buffer)?.0 as usize;
-            let field = Field::deserialize(&mut footer_buffer)?;
+            let file_addr = FileAddr::deserialize(&mut footer_buffer)?;
            offsets.push(offset);
-            fields.push(field);
+            file_addrs.push(file_addr);
        }
        offsets.push(footer_start);
        for i in 0..num_fields {
-            let field = fields[i];
+            let file_addr = file_addrs[i];
            let start_offset = offsets[i];
            let end_offset = offsets[i + 1];
-            field_index.insert(field, (start_offset, end_offset));
+            field_index.insert(file_addr, (start_offset, end_offset));
        }

        Ok(CompositeFile {
@@ -124,8 +157,14 @@ impl CompositeFile {
    /// Returns the `ReadOnlySource` associated
    /// to a given `Field` and stored in a `CompositeFile`.
    pub fn open_read(&self, field: Field) -> Option<ReadOnlySource> {
+        self.open_read_with_idx(field, 0)
+    }
+
+    /// Returns the `ReadOnlySource` associated
+    /// to a given `Field` and stored in a `CompositeFile`.
+    pub fn open_read_with_idx(&self, field: Field, idx: usize) -> Option<ReadOnlySource> {
        self.offsets_index
-            .get(&field)
+            .get(&FileAddr { field, idx })
            .map(|&(from, to)| self.data.slice(from, to))
    }
 }
@@ -133,12 +172,12 @@ impl CompositeFile {
 #[cfg(test)]
 mod test {

-    use std::io::Write;
    use super::{CompositeFile, CompositeWrite};
+    use common::BinarySerializable;
+    use common::VInt;
    use directory::{Directory, RAMDirectory};
    use schema::Field;
-    use common::VInt;
-    use common::BinarySerializable;
+    use std::io::Write;
    use std::path::Path;

    #[test]
--- a/src/common/counting_writer.rs
+++ b/src/common/counting_writer.rs
@@ -1,5 +1,5 @@
-use std::io::Write;
 use std::io;
+use std::io::Write;

 pub struct CountingWriter<W> {
    underlying: W,
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -1,22 +1,59 @@
-mod serialize;
-mod timer;
-mod vint;
-mod counting_writer;
-mod composite_file;
 pub mod bitpacker;
+mod bitset;
+mod composite_file;
+mod counting_writer;
+mod serialize;
+mod vint;

+pub use self::bitset::BitSet;
+pub(crate) use self::bitset::TinySet;
 pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
-pub use self::serialize::BinarySerializable;
-pub use self::timer::Timing;
-pub use self::timer::TimerTree;
-pub use self::timer::OpenTimer;
-pub use self::vint::VInt;
 pub use self::counting_writer::CountingWriter;
+pub use self::serialize::{BinarySerializable, FixedSize};
+pub use self::vint::VInt;
+pub use byteorder::LittleEndian as Endianness;

 use std::io;

+/// Computes the number of bits that will be used for bitpacking.
+///
+/// In general the target is the minimum number of bits
+/// required to express the amplitude given in argument.
+///
+/// e.g. If the amplitude is 10, we can store all ints on simply 4bits.
+///
+/// The logic is slightly more convoluted here as for optimization
+/// reasons, we want to ensure that a value spawns over at most 8 bytes
+/// of aligns bytes.
+///
+/// Spanning over 9 bytes is possible for instance, if we do
+/// bitpacking with an amplitude of 63 bits.
+/// In this case, the second int will start on bit
+/// 63 (which belongs to byte 7) and ends at byte 15;
+/// Hence 9 bytes (from byte 7 to byte 15 included).
+///
+/// To avoid this, we force the number of bits to 64bits
+/// when the result is greater than `64-8 = 56 bits`.
+///
+/// Note that this only affects rare use cases spawning over
+/// a very large range of values. Even in this case, it results
+/// in an extra cost of at most 12% compared to the optimal
+/// number of bits.
+pub(crate) fn compute_num_bits(n: u64) -> u8 {
+    let amplitude = (64u32 - n.leading_zeros()) as u8;
+    if amplitude <= 64 - 8 {
+        amplitude
+    } else {
+        64
+    }
+}
+
+pub(crate) fn is_power_of_2(n: usize) -> bool {
+    (n > 0) && (n & (n - 1) == 0)
+}
+
 /// Create a default io error given a string.
-pub fn make_io_err(msg: String) -> io::Error {
+pub(crate) fn make_io_err(msg: String) -> io::Error {
    io::Error::new(io::ErrorKind::Other, msg)
 }

@@ -65,9 +102,10 @@ pub fn u64_to_i64(val: u64) -> i64 {
 }

 #[cfg(test)]
-mod test {
+pub(crate) mod test {

-    use super::{i64_to_u64, u64_to_i64};
+    pub use super::serialize::test::fixed_size_test;
+    use super::{compute_num_bits, i64_to_u64, u64_to_i64};

    fn test_i64_converter_helper(val: i64) {
        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
@@ -84,4 +122,16 @@ mod test {
            test_i64_converter_helper(i);
        }
    }
+
+    #[test]
+    fn test_compute_num_bits() {
+        assert_eq!(compute_num_bits(1), 1u8);
+        assert_eq!(compute_num_bits(0), 0u8);
+        assert_eq!(compute_num_bits(2), 2u8);
+        assert_eq!(compute_num_bits(3), 2u8);
+        assert_eq!(compute_num_bits(4), 3u8);
+        assert_eq!(compute_num_bits(255), 8u8);
+        assert_eq!(compute_num_bits(256), 9u8);
+        assert_eq!(compute_num_bits(5_000_000_000), 33u8);
+    }
 }
--- a/src/common/serialize.rs
+++ b/src/common/serialize.rs
@@ -1,16 +1,25 @@
 use byteorder::{ReadBytesExt, WriteBytesExt};
-use byteorder::LittleEndian as Endianness;
-use std::fmt;
-use std::io::Write;
-use std::io::Read;
-use std::io;
+use common::Endianness;
 use common::VInt;
+use std::fmt;
+use std::io;
+use std::io::Read;
+use std::io::Write;

+/// Trait for a simple binary serialization.
 pub trait BinarySerializable: fmt::Debug + Sized {
+    /// Serialize
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()>;
+    /// Deserialize
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self>;
 }

+/// `FixedSize` marks a `BinarySerializable` as
+/// always serializing to the same size.
+pub trait FixedSize: BinarySerializable {
+    const SIZE_IN_BYTES: usize;
+}
+
 impl BinarySerializable for () {
    fn serialize<W: Write>(&self, _: &mut W) -> io::Result<()> {
        Ok(())
@@ -20,6 +29,10 @@ impl BinarySerializable for () {
    }
 }

+impl FixedSize for () {
+    const SIZE_IN_BYTES: usize = 0;
+}
+
 impl<T: BinarySerializable> BinarySerializable for Vec<T> {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        VInt(self.len() as u64).serialize(writer)?;
@@ -59,6 +72,10 @@ impl BinarySerializable for u32 {
    }
 }

+impl FixedSize for u32 {
+    const SIZE_IN_BYTES: usize = 4;
+}
+
 impl BinarySerializable for u64 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u64::<Endianness>(*self)
@@ -68,6 +85,10 @@ impl BinarySerializable for u64 {
    }
 }

+impl FixedSize for u64 {
+    const SIZE_IN_BYTES: usize = 8;
+}
+
 impl BinarySerializable for i64 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_i64::<Endianness>(*self)
@@ -77,6 +98,10 @@ impl BinarySerializable for i64 {
    }
 }

+impl FixedSize for i64 {
+    const SIZE_IN_BYTES: usize = 8;
+}
+
 impl BinarySerializable for u8 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u8(*self)
@@ -86,6 +111,10 @@ impl BinarySerializable for u8 {
    }
 }

+impl FixedSize for u8 {
+    const SIZE_IN_BYTES: usize = 1;
+}
+
 impl BinarySerializable for String {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        let data: &[u8] = self.as_bytes();
@@ -104,63 +133,78 @@ impl BinarySerializable for String {
 }

 #[cfg(test)]
-mod test {
+pub mod test {

-    use common::VInt;
    use super::*;
+    use common::VInt;

-    fn serialize_test<T: BinarySerializable + Eq>(v: T, num_bytes: usize) {
+    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
+        let mut buffer = Vec::new();
+        O::default().serialize(&mut buffer).unwrap();
+        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
+    }
+
+    fn serialize_test<T: BinarySerializable + Eq>(v: T) -> usize {
        let mut buffer: Vec<u8> = Vec::new();
-        if num_bytes != 0 {
-            v.serialize(&mut buffer).unwrap();
-            assert_eq!(buffer.len(), num_bytes);
-        } else {
-            v.serialize(&mut buffer).unwrap();
-        }
+        v.serialize(&mut buffer).unwrap();
+        let num_bytes = buffer.len();
        let mut cursor = &buffer[..];
        let deser = T::deserialize(&mut cursor).unwrap();
        assert_eq!(deser, v);
+        num_bytes
    }

    #[test]
    fn test_serialize_u8() {
-        serialize_test(3u8, 1);
-        serialize_test(5u8, 1);
+        fixed_size_test::<u8>();
    }

    #[test]
    fn test_serialize_u32() {
-        serialize_test(3u32, 4);
-        serialize_test(5u32, 4);
-        serialize_test(u32::max_value(), 4);
+        fixed_size_test::<u32>();
+        assert_eq!(4, serialize_test(3u32));
+        assert_eq!(4, serialize_test(5u32));
+        assert_eq!(4, serialize_test(u32::max_value()));
+    }
+
+    #[test]
+    fn test_serialize_i64() {
+        fixed_size_test::<i64>();
+    }
+
+    #[test]
+    fn test_serialize_u64() {
+        fixed_size_test::<u64>();
    }

    #[test]
    fn test_serialize_string() {
-        serialize_test(String::from(""), 1);
-        serialize_test(String::from("ぽよぽよ"), 1 + 3 * 4);
-        serialize_test(String::from("富士さん見える。"), 1 + 3 * 8);
+        assert_eq!(serialize_test(String::from("")), 1);
+        assert_eq!(serialize_test(String::from("ぽよぽよ")), 1 + 3 * 4);
+        assert_eq!(
+            serialize_test(String::from("富士さん見える。")),
+            1 + 3 * 8
+        );
    }

    #[test]
    fn test_serialize_vec() {
-        let v: Vec<u8> = Vec::new();
-        serialize_test(v, 1);
-        serialize_test(vec![1u32, 3u32], 1 + 4 * 2);
+        assert_eq!(serialize_test(Vec::<u8>::new()), 1);
+        assert_eq!(serialize_test(vec![1u32, 3u32]), 1 + 4 * 2);
    }

    #[test]
    fn test_serialize_vint() {
        for i in 0..10_000 {
-            serialize_test(VInt(i as u64), 0);
+            serialize_test(VInt(i as u64));
        }
-        serialize_test(VInt(7u64), 1);
-        serialize_test(VInt(127u64), 1);
-        serialize_test(VInt(128u64), 2);
-        serialize_test(VInt(129u64), 2);
-        serialize_test(VInt(1234u64), 2);
-        serialize_test(VInt(16_383), 2);
-        serialize_test(VInt(16_384), 3);
-        serialize_test(VInt(u64::max_value()), 10);
+        assert_eq!(serialize_test(VInt(7u64)), 1);
+        assert_eq!(serialize_test(VInt(127u64)), 1);
+        assert_eq!(serialize_test(VInt(128u64)), 2);
+        assert_eq!(serialize_test(VInt(129u64)), 2);
+        assert_eq!(serialize_test(VInt(1234u64)), 2);
+        assert_eq!(serialize_test(VInt(16_383u64)), 2);
+        assert_eq!(serialize_test(VInt(16_384u64)), 3);
+        assert_eq!(serialize_test(VInt(u64::max_value())), 10);
    }
 }
--- a/src/common/timer.rs
+++ b/src/common/timer.rs
@@ -1,99 +0,0 @@
-use time::PreciseTime;
-
-pub struct OpenTimer<'a> {
-    name: &'static str,
-    timer_tree: &'a mut TimerTree,
-    start: PreciseTime,
-    depth: u32,
-}
-
-impl<'a> OpenTimer<'a> {
-    /// Starts timing a new named subtask
-    ///
-    /// The timer is stopped automatically
-    /// when the `OpenTimer` is dropped.
-    pub fn open(&mut self, name: &'static str) -> OpenTimer {
-        OpenTimer {
-            name: name,
-            timer_tree: self.timer_tree,
-            start: PreciseTime::now(),
-            depth: self.depth + 1,
-        }
-    }
-}
-
-impl<'a> Drop for OpenTimer<'a> {
-    fn drop(&mut self) {
-        self.timer_tree.timings.push(Timing {
-            name: self.name,
-            duration: self.start
-                .to(PreciseTime::now())
-                .num_microseconds()
-                .unwrap(),
-            depth: self.depth,
-        });
-    }
-}
-
-/// Timing recording
-#[derive(Debug, Serialize)]
-pub struct Timing {
-    name: &'static str,
-    duration: i64,
-    depth: u32,
-}
-
-/// Timer tree
-#[derive(Debug, Serialize)]
-pub struct TimerTree {
-    timings: Vec<Timing>,
-}
-
-impl TimerTree {
-    /// Returns the total time elapsed in microseconds
-    pub fn total_time(&self) -> i64 {
-        self.timings.last().unwrap().duration
-    }
-
-    /// Open a new named subtask
-    pub fn open(&mut self, name: &'static str) -> OpenTimer {
-        OpenTimer {
-            name: name,
-            timer_tree: self,
-            start: PreciseTime::now(),
-            depth: 0,
-        }
-    }
-}
-
-impl Default for TimerTree {
-    fn default() -> TimerTree {
-        TimerTree {
-            timings: Vec::new(),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-
-    #[test]
-    fn test_timer() {
-        let mut timer_tree = TimerTree::default();
-        {
-            let mut a = timer_tree.open("a");
-            {
-                let mut ab = a.open("b");
-                {
-                    let _abc = ab.open("c");
-                }
-                {
-                    let _abd = ab.open("d");
-                }
-            }
-        }
-        assert_eq!(timer_tree.timings.len(), 4);
-    }
-}
--- a/src/common/vint.rs
+++ b/src/common/vint.rs
@@ -1,7 +1,7 @@
 use super::BinarySerializable;
 use std::io;
-use std::io::Write;
 use std::io::Read;
+use std::io::Write;

 ///   Wrapper over a `u64` that serializes as a variable int.
 #[derive(Debug, Eq, PartialEq)]
@@ -11,6 +11,10 @@ impl VInt {
    pub fn val(&self) -> u64 {
        self.0
    }
+
+    pub fn deserialize_u64<R: Read>(reader: &mut R) -> io::Result<u64> {
+        VInt::deserialize(reader).map(|vint| vint.0)
+    }
 }

 impl BinarySerializable for VInt {
--- a/src/compression/mod.rs
+++ b/src/compression/mod.rs
@@ -3,39 +3,106 @@

 mod stream;

+pub const COMPRESSION_BLOCK_SIZE: usize = 128;
+const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * 4 + 1;
+
 pub use self::stream::CompressedIntStream;

-#[cfg(not(feature = "simdcompression"))]
-mod pack {
-    mod compression_pack_nosimd;
-    pub use self::compression_pack_nosimd::{BlockDecoder, BlockEncoder};
-}
-
-#[cfg(feature = "simdcompression")]
-mod pack {
-    mod compression_pack_simd;
-    pub use self::compression_pack_simd::{BlockDecoder, BlockEncoder};
-}
-
-pub use self::pack::{BlockDecoder, BlockEncoder};
-
-#[cfg(any(not(feature = "simdcompression"), target_env = "msvc"))]
-mod vint {
-    mod compression_vint_nosimd;
-    pub(crate) use self::compression_vint_nosimd::*;
-}
-
-#[cfg(all(feature = "simdcompression", not(target_env = "msvc")))]
-mod vint {
-    mod compression_vint_simd;
-    pub(crate) use self::compression_vint_simd::*;
-}
+use bitpacking::{BitPacker, BitPacker4x};

 /// Returns the size in bytes of a compressed block, given `num_bits`.
 pub fn compressed_block_size(num_bits: u8) -> usize {
-    1 + (num_bits as usize) * 16
+    1 + (num_bits as usize) * COMPRESSION_BLOCK_SIZE / 8
 }

+pub struct BlockEncoder {
+    bitpacker: BitPacker4x,
+    pub output: [u8; COMPRESSED_BLOCK_MAX_SIZE],
+    pub output_len: usize,
+}
+
+impl BlockEncoder {
+    pub fn new() -> BlockEncoder {
+        BlockEncoder {
+            bitpacker: BitPacker4x::new(),
+            output: [0u8; COMPRESSED_BLOCK_MAX_SIZE],
+            output_len: 0,
+        }
+    }
+
+    pub fn compress_block_sorted(&mut self, block: &[u32], offset: u32) -> &[u8] {
+        let num_bits = self.bitpacker.num_bits_sorted(offset, block);
+        self.output[0] = num_bits;
+        let written_size =
+            1 + self
+                .bitpacker
+                .compress_sorted(offset, block, &mut self.output[1..], num_bits);
+        &self.output[..written_size]
+    }
+
+    pub fn compress_block_unsorted(&mut self, block: &[u32]) -> &[u8] {
+        let num_bits = self.bitpacker.num_bits(block);
+        self.output[0] = num_bits;
+        let written_size = 1 + self
+            .bitpacker
+            .compress(block, &mut self.output[1..], num_bits);
+        &self.output[..written_size]
+    }
+}
+
+pub struct BlockDecoder {
+    bitpacker: BitPacker4x,
+    pub output: [u32; COMPRESSION_BLOCK_SIZE + 1],
+    pub output_len: usize,
+}
+
+impl BlockDecoder {
+    pub fn new() -> BlockDecoder {
+        BlockDecoder::with_val(0u32)
+    }
+
+    pub fn with_val(val: u32) -> BlockDecoder {
+        let mut output = [val; COMPRESSION_BLOCK_SIZE + 1];
+        output[COMPRESSION_BLOCK_SIZE] = 0u32;
+        BlockDecoder {
+            bitpacker: BitPacker4x::new(),
+            output,
+            output_len: 0,
+        }
+    }
+
+    pub fn uncompress_block_sorted(&mut self, compressed_data: &[u8], offset: u32) -> usize {
+        let num_bits = compressed_data[0];
+        self.output_len = COMPRESSION_BLOCK_SIZE;
+        1 + self.bitpacker.decompress_sorted(
+            offset,
+            &compressed_data[1..],
+            &mut self.output,
+            num_bits,
+        )
+    }
+
+    pub fn uncompress_block_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> usize {
+        let num_bits = compressed_data[0];
+        self.output_len = COMPRESSION_BLOCK_SIZE;
+        1 + self
+            .bitpacker
+            .decompress(&compressed_data[1..], &mut self.output, num_bits)
+    }
+
+    #[inline]
+    pub fn output_array(&self) -> &[u32] {
+        &self.output[..self.output_len]
+    }
+
+    #[inline]
+    pub fn output(&self, idx: usize) -> u32 {
+        self.output[idx]
+    }
+}
+
+mod vint;
+
 pub trait VIntEncoder {
    /// Compresses an array of `u32` integers,
    /// using [delta-encoding](https://en.wikipedia.org/wiki/Delta_encoding)
@@ -112,14 +179,10 @@ impl VIntDecoder for BlockDecoder {
    }
 }

-pub const COMPRESSION_BLOCK_SIZE: usize = 128;
-
 #[cfg(test)]
 pub mod tests {

    use super::*;
-    use tests;
-    use test::Bencher;

    #[test]
    fn test_encode_sorted_block() {
@@ -208,11 +271,34 @@ pub mod tests {
            }
        }
    }
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use super::*;
+    use rand::Rng;
+    use rand::SeedableRng;
+    use rand::XorShiftRng;
+    use test::Bencher;
+
+    fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
+        let seed: &[u32; 4] = &[1, 2, 3, seed_val];
+        let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
+        (0..u32::max_value())
+            .filter(|_| rng.next_f32() < ratio)
+            .take(n)
+            .collect()
+    }
+
+    pub fn generate_array(n: usize, ratio: f32) -> Vec<u32> {
+        generate_array_with_seed(n, ratio, 4)
+    }

    #[bench]
    fn bench_compress(b: &mut Bencher) {
        let mut encoder = BlockEncoder::new();
-        let data = tests::generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
+        let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
        b.iter(|| {
            encoder.compress_block_sorted(&data, 0u32);
        });
@@ -221,7 +307,7 @@ pub mod tests {
    #[bench]
    fn bench_uncompress(b: &mut Bencher) {
        let mut encoder = BlockEncoder::new();
-        let data = tests::generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
+        let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
        let compressed = encoder.compress_block_sorted(&data, 0u32);
        let mut decoder = BlockDecoder::new();
        b.iter(|| {
@@ -248,7 +334,7 @@ pub mod tests {
    #[bench]
    fn bench_compress_vint(b: &mut Bencher) {
        let mut encoder = BlockEncoder::new();
-        let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001);
+        let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
        b.iter(|| {
            encoder.compress_vint_sorted(&data, 0u32);
        });
@@ -257,12 +343,11 @@ pub mod tests {
    #[bench]
    fn bench_uncompress_vint(b: &mut Bencher) {
        let mut encoder = BlockEncoder::new();
-        let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001);
+        let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
        let compressed = encoder.compress_vint_sorted(&data, 0u32);
        let mut decoder = BlockDecoder::new();
        b.iter(|| {
            decoder.uncompress_vint_sorted(compressed, 0u32, NUM_INTS_BENCH_VINT);
        });
    }
-
 }
--- a/src/compression/pack/compression_pack_nosimd.rs
+++ b/src/compression/pack/compression_pack_nosimd.rs
@@ -1,142 +0,0 @@
-use common::bitpacker::compute_num_bits;
-use common::bitpacker::{BitPacker, BitUnpacker};
-use common::CountingWriter;
-use std::cmp;
-use std::io::Write;
-use super::super::COMPRESSION_BLOCK_SIZE;
-
-const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * 4 + 1;
-
-pub fn compress_sorted(vals: &mut [u32], output: &mut [u8], offset: u32) -> usize {
-    let mut max_delta = 0;
-    {
-        let mut local_offset = offset;
-        for i in 0..COMPRESSION_BLOCK_SIZE {
-            let val = vals[i];
-            let delta = val - local_offset;
-            max_delta = cmp::max(max_delta, delta);
-            vals[i] = delta;
-            local_offset = val;
-        }
-    }
-    let mut counting_writer = CountingWriter::wrap(output);
-    let num_bits = compute_num_bits(max_delta as u64);
-    counting_writer.write_all(&[num_bits]).unwrap();
-
-    let mut bit_packer = BitPacker::new(num_bits as usize);
-    for val in vals {
-        bit_packer.write(*val as u64, &mut counting_writer).unwrap();
-    }
-    counting_writer.written_bytes()
-}
-
-pub struct BlockEncoder {
-    pub output: [u8; COMPRESSED_BLOCK_MAX_SIZE],
-    pub output_len: usize,
-    input_buffer: [u32; COMPRESSION_BLOCK_SIZE],
-}
-
-impl BlockEncoder {
-    pub fn new() -> BlockEncoder {
-        BlockEncoder {
-            output: [0u8; COMPRESSED_BLOCK_MAX_SIZE],
-            output_len: 0,
-            input_buffer: [0u32; COMPRESSION_BLOCK_SIZE],
-        }
-    }
-
-    pub fn compress_block_sorted(&mut self, vals: &[u32], offset: u32) -> &[u8] {
-        self.input_buffer.clone_from_slice(vals);
-        let compressed_size = compress_sorted(&mut self.input_buffer, &mut self.output, offset);
-        &self.output[..compressed_size]
-    }
-
-    pub fn compress_block_unsorted(&mut self, vals: &[u32]) -> &[u8] {
-        let compressed_size = {
-            let output: &mut [u8] = &mut self.output;
-            let max = vals.iter()
-                .cloned()
-                .max()
-                .expect("compress unsorted called with an empty array");
-            let num_bits = compute_num_bits(max as u64);
-            let mut counting_writer = CountingWriter::wrap(output);
-            counting_writer.write_all(&[num_bits]).unwrap();
-            let mut bit_packer = BitPacker::new(num_bits as usize);
-            for val in vals {
-                bit_packer.write(*val as u64, &mut counting_writer).unwrap();
-            }
-            for _ in vals.len()..COMPRESSION_BLOCK_SIZE {
-                bit_packer
-                    .write(vals[0] as u64, &mut counting_writer)
-                    .unwrap();
-            }
-            bit_packer.flush(&mut counting_writer).expect(
-                "Flushing the bitpacking \
-                 in an in RAM buffer should never fail",
-            );
-            // we avoid writing "closing", because we
-            // do not want 7 bytes of padding here.
-            counting_writer.written_bytes()
-        };
-        &self.output[..compressed_size]
-    }
-}
-
-pub struct BlockDecoder {
-    pub output: [u32; COMPRESSED_BLOCK_MAX_SIZE],
-    pub output_len: usize,
-}
-
-impl BlockDecoder {
-    pub fn new() -> BlockDecoder {
-        BlockDecoder::with_val(0u32)
-    }
-
-    pub fn with_val(val: u32) -> BlockDecoder {
-        BlockDecoder {
-            output: [val; COMPRESSED_BLOCK_MAX_SIZE],
-            output_len: 0,
-        }
-    }
-
-    pub fn uncompress_block_sorted<'a>(
-        &mut self,
-        compressed_data: &'a [u8],
-        mut offset: u32,
-    ) -> usize {
-        let consumed_size = {
-            let num_bits = compressed_data[0];
-            let bit_unpacker = BitUnpacker::new(&compressed_data[1..], num_bits as usize);
-            for i in 0..COMPRESSION_BLOCK_SIZE {
-                let delta = bit_unpacker.get(i);
-                let val = offset + delta as u32;
-                self.output[i] = val;
-                offset = val;
-            }
-            1 + (num_bits as usize * COMPRESSION_BLOCK_SIZE + 7) / 8
-        };
-        self.output_len = COMPRESSION_BLOCK_SIZE;
-        consumed_size
-    }
-
-    pub fn uncompress_block_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> usize {
-        let num_bits = compressed_data[0];
-        let bit_unpacker = BitUnpacker::new(&compressed_data[1..], num_bits as usize);
-        for i in 0..COMPRESSION_BLOCK_SIZE {
-            self.output[i] = bit_unpacker.get(i) as u32;
-        }
-        let consumed_size = 1 + (num_bits as usize * COMPRESSION_BLOCK_SIZE + 7) / 8;
-        self.output_len = COMPRESSION_BLOCK_SIZE;
-        consumed_size
-    }
-
-    #[inline]
-    pub fn output_array(&self) -> &[u32] {
-        &self.output[..self.output_len]
-    }
-
-    #[inline]
-    pub fn output(&self, idx: usize) -> u32 {
-        self.output[idx]
-    }
-}
--- a/src/compression/pack/compression_pack_simd.rs
+++ b/src/compression/pack/compression_pack_simd.rs
@@ -1,118 +0,0 @@
-use super::super::COMPRESSION_BLOCK_SIZE;
-
-const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * 4 + 1;
-
-mod simdcomp {
-    use libc::size_t;
-
-    extern "C" {
-        pub fn compress_sorted(data: *const u32, output: *mut u8, offset: u32) -> size_t;
-
-        pub fn uncompress_sorted(
-            compressed_data: *const u8,
-            output: *mut u32,
-            offset: u32,
-        ) -> size_t;
-
-        pub fn compress_unsorted(data: *const u32, output: *mut u8) -> size_t;
-
-        pub fn uncompress_unsorted(compressed_data: *const u8, output: *mut u32) -> size_t;
-    }
-}
-
-fn compress_sorted(vals: &[u32], output: &mut [u8], offset: u32) -> usize {
-    unsafe { simdcomp::compress_sorted(vals.as_ptr(), output.as_mut_ptr(), offset) }
-}
-
-fn uncompress_sorted(compressed_data: &[u8], output: &mut [u32], offset: u32) -> usize {
-    unsafe {
-        simdcomp::uncompress_sorted(compressed_data.as_ptr(), output.as_mut_ptr(), offset)
-    }
-}
-
-fn compress_unsorted(vals: &[u32], output: &mut [u8]) -> usize {
-    unsafe { simdcomp::compress_unsorted(vals.as_ptr(), output.as_mut_ptr()) }
-}
-
-fn uncompress_unsorted(compressed_data: &[u8], output: &mut [u32]) -> usize {
-    unsafe { simdcomp::uncompress_unsorted(compressed_data.as_ptr(), output.as_mut_ptr()) }
-}
-
-pub struct BlockEncoder {
-    pub output: [u8; COMPRESSED_BLOCK_MAX_SIZE],
-    pub output_len: usize,
-}
-
-impl BlockEncoder {
-    pub fn new() -> BlockEncoder {
-        BlockEncoder {
-            output: [0u8; COMPRESSED_BLOCK_MAX_SIZE],
-            output_len: 0,
-        }
-    }
-
-    pub fn compress_block_sorted(&mut self, vals: &[u32], offset: u32) -> &[u8] {
-        let compressed_size = compress_sorted(vals, &mut self.output, offset);
-        &self.output[..compressed_size]
-    }
-
-    pub fn compress_block_unsorted(&mut self, vals: &[u32]) -> &[u8] {
-        let compressed_size = compress_unsorted(vals, &mut self.output);
-        &self.output[..compressed_size]
-    }
-}
-
-pub struct BlockDecoder {
-    pub output: [u32; COMPRESSED_BLOCK_MAX_SIZE],
-    pub output_len: usize,
-}
-
-impl BlockDecoder {
-    pub fn new() -> BlockDecoder {
-        BlockDecoder::with_val(0u32)
-    }
-
-    pub fn with_val(val: u32) -> BlockDecoder {
-        BlockDecoder {
-            output: [val; COMPRESSED_BLOCK_MAX_SIZE],
-            output_len: 0,
-        }
-    }
-
-    pub fn uncompress_block_sorted(&mut self, compressed_data: &[u8], offset: u32) -> usize {
-        let consumed_size = uncompress_sorted(compressed_data, &mut self.output, offset);
-        self.output_len = COMPRESSION_BLOCK_SIZE;
-        consumed_size
-    }
-
-    pub fn uncompress_block_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> usize {
-        let consumed_size = uncompress_unsorted(compressed_data, &mut self.output);
-        self.output_len = COMPRESSION_BLOCK_SIZE;
-        consumed_size
-    }
-
-    #[inline]
-    pub fn output_array(&self) -> &[u32] {
-        &self.output[..self.output_len]
-    }
-
-    #[inline]
-    pub fn output(&self, idx: usize) -> u32 {
-        self.output[idx]
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::BlockEncoder;
-
-    #[test]
-    fn test_all_docs_compression_len() {
-        let data: Vec<u32> = (0u32..128u32).collect();
-        let mut encoder = BlockEncoder::new();
-        let compressed = encoder.compress_block_sorted(&data, 0u32);
-        assert_eq!(compressed.len(), 17);
-    }
-
-}
--- a/src/compression/stream.rs
+++ b/src/compression/stream.rs
@@ -1,6 +1,6 @@
+use compression::compressed_block_size;
 use compression::BlockDecoder;
 use compression::COMPRESSION_BLOCK_SIZE;
-use compression::compressed_block_size;
 use directory::{ReadOnlySource, SourceRead};

 /// Reads a stream of compressed ints.
@@ -11,7 +11,12 @@ use directory::{ReadOnlySource, SourceRead};
 /// decompressing blocks that are not required.
 pub struct CompressedIntStream {
    buffer: SourceRead,
+
    block_decoder: BlockDecoder,
+    cached_addr: usize,      // address of the currently decoded block
+    cached_next_addr: usize, // address following the currently decoded block
+
+    addr: usize, // address of the block associated to the current position
    inner_offset: usize,
 }

@@ -21,34 +26,49 @@ impl CompressedIntStream {
        CompressedIntStream {
            buffer: SourceRead::from(source),
            block_decoder: BlockDecoder::new(),
-            inner_offset: COMPRESSION_BLOCK_SIZE,
+            cached_addr: usize::max_value(),
+            cached_next_addr: usize::max_value(),
+
+            addr: 0,
+            inner_offset: 0,
        }
    }

-    /// Fills a buffer with the next `output.len()` integers,
-    /// and advance the stream by that many els.
+    /// Loads the block at the given address and return the address of the
+    /// following block
+    pub fn read_block(&mut self, addr: usize) -> usize {
+        if self.cached_addr == addr {
+            // we are already on this block.
+            // no need to read.
+            self.cached_next_addr
+        } else {
+            let next_addr = addr + self
+                .block_decoder
+                .uncompress_block_unsorted(self.buffer.slice_from(addr));
+            self.cached_addr = addr;
+            self.cached_next_addr = next_addr;
+            next_addr
+        }
+    }
+
+    /// Fills a buffer with the next `output.len()` integers.
+    /// This does not consume / advance the stream.
    pub fn read(&mut self, output: &mut [u32]) {
+        let mut cursor = self.addr;
+        let mut inner_offset = self.inner_offset;
        let mut num_els: usize = output.len();
-        let mut start: usize = 0;
+        let mut start = 0;
        loop {
-            let available = COMPRESSION_BLOCK_SIZE - self.inner_offset;
-            if num_els >= available {
-                if available > 0 {
-                    let uncompressed_block =
-                        &self.block_decoder.output_array()[self.inner_offset..];
-                    output[start..][..available].clone_from_slice(uncompressed_block);
-                }
-                num_els -= available;
-                start += available;
-                let num_consumed_bytes = self.block_decoder
-                    .uncompress_block_unsorted(self.buffer.as_ref());
-                self.buffer.advance(num_consumed_bytes);
-                self.inner_offset = 0;
+            cursor = self.read_block(cursor);
+            let block = &self.block_decoder.output_array()[inner_offset..];
+            let block_len = block.len();
+            if num_els >= block_len {
+                output[start..start + block_len].clone_from_slice(&block);
+                start += block_len;
+                num_els -= block_len;
+                inner_offset = 0;
            } else {
-                let uncompressed_block = &self.block_decoder.output_array()
-                    [self.inner_offset..self.inner_offset + num_els];
-                output[start..][..num_els].clone_from_slice(uncompressed_block);
-                self.inner_offset += num_els;
+                output[start..].clone_from_slice(&block[..num_els]);
                break;
            }
        }
@@ -58,23 +78,22 @@ impl CompressedIntStream {
    ///
    /// If a full block is skipped, calling
    /// `.skip(...)` will avoid decompressing it.
+    ///
+    /// May panic if the end of the stream is reached.
    pub fn skip(&mut self, mut skip_len: usize) {
-        let available = COMPRESSION_BLOCK_SIZE - self.inner_offset;
-        if available >= skip_len {
-            self.inner_offset += skip_len;
-        } else {
-            skip_len -= available;
-            // entirely skip decompressing some blocks.
-            while skip_len >= COMPRESSION_BLOCK_SIZE {
-                skip_len -= COMPRESSION_BLOCK_SIZE;
-                let num_bits: u8 = self.buffer.as_ref()[0];
+        loop {
+            let available = COMPRESSION_BLOCK_SIZE - self.inner_offset;
+            if available >= skip_len {
+                self.inner_offset += skip_len;
+                break;
+            } else {
+                skip_len -= available;
+                // entirely skip decompressing some blocks.
+                let num_bits: u8 = self.buffer.get(self.addr);
                let block_len = compressed_block_size(num_bits);
-                self.buffer.advance(block_len);
+                self.addr += block_len;
+                self.inner_offset = 0;
            }
-            let num_consumed_bytes = self.block_decoder
-                .uncompress_block_unsorted(self.buffer.as_ref());
-            self.buffer.advance(num_consumed_bytes);
-            self.inner_offset = skip_len;
        }
    }
 }
@@ -84,14 +103,14 @@ pub mod tests {

    use super::CompressedIntStream;
    use compression::compressed_block_size;
-    use compression::COMPRESSION_BLOCK_SIZE;
    use compression::BlockEncoder;
+    use compression::COMPRESSION_BLOCK_SIZE;
    use directory::ReadOnlySource;

    fn create_stream_buffer() -> ReadOnlySource {
        let mut buffer: Vec<u8> = vec![];
        let mut encoder = BlockEncoder::new();
-        let vals: Vec<u32> = (0u32..1_025u32).collect();
+        let vals: Vec<u32> = (0u32..1152u32).collect();
        for chunk in vals.chunks(COMPRESSION_BLOCK_SIZE) {
            let compressed_block = encoder.compress_block_unsorted(chunk);
            let num_bits = compressed_block[0];
@@ -113,13 +132,24 @@ pub mod tests {
        stream.read(&mut block[0..2]);
        assert_eq!(block[0], 0);
        assert_eq!(block[1], 1);
+
+        // reading does not consume the stream
+        stream.read(&mut block[0..2]);
+        assert_eq!(block[0], 0);
+        assert_eq!(block[1], 1);
+        stream.skip(2);
+
        stream.skip(5);
        stream.read(&mut block[0..3]);
+        stream.skip(3);
+
        assert_eq!(block[0], 7);
        assert_eq!(block[1], 8);
        assert_eq!(block[2], 9);
        stream.skip(500);
        stream.read(&mut block[0..3]);
+        stream.skip(3);
+
        assert_eq!(block[0], 510);
        assert_eq!(block[1], 511);
        assert_eq!(block[2], 512);
--- a/src/compression/vint/compression_vint_nosimd.rs
+++ b/src/compression/vint/compression_vint_nosimd.rs
--- a/src/compression/vint/compression_vint_simd.rs
+++ b/src/compression/vint/compression_vint_simd.rs
@@ -1,72 +0,0 @@
-mod streamvbyte {
-
-    use libc::size_t;
-
-    extern "C" {
-        pub fn streamvbyte_delta_encode(
-            data: *const u32,
-            num_els: u32,
-            output: *mut u8,
-            offset: u32,
-        ) -> size_t;
-
-        pub fn streamvbyte_delta_decode(
-            compressed_data: *const u8,
-            output: *mut u32,
-            num_els: u32,
-            offset: u32,
-        ) -> size_t;
-
-        pub fn streamvbyte_encode(data: *const u32, num_els: u32, output: *mut u8) -> size_t;
-
-        pub fn streamvbyte_decode(
-            compressed_data: *const u8,
-            output: *mut u32,
-            num_els: usize,
-        ) -> size_t;
-    }
-}
-
-#[inline(always)]
-pub(crate) fn compress_sorted<'a>(input: &[u32], output: &'a mut [u8], offset: u32) -> &'a [u8] {
-    let compress_length = unsafe {
-        streamvbyte::streamvbyte_delta_encode(
-            input.as_ptr(),
-            input.len() as u32,
-            output.as_mut_ptr(),
-            offset,
-        )
-    };
-    &output[..compress_length]
-}
-
-#[inline(always)]
-pub(crate) fn compress_unsorted<'a>(input: &[u32], output: &'a mut [u8]) -> &'a [u8] {
-    let compress_length = unsafe {
-        streamvbyte::streamvbyte_encode(input.as_ptr(), input.len() as u32, output.as_mut_ptr())
-    };
-    &output[..compress_length]
-}
-
-#[inline(always)]
-pub(crate) fn uncompress_sorted<'a>(
-    compressed_data: &'a [u8],
-    output: &mut [u32],
-    offset: u32,
-) -> usize {
-    unsafe {
-        streamvbyte::streamvbyte_delta_decode(
-            compressed_data.as_ptr(),
-            output.as_mut_ptr(),
-            output.len() as u32,
-            offset,
-        )
-    }
-}
-
-#[inline(always)]
-pub(crate) fn uncompress_unsorted<'a>(compressed_data: &'a [u8], output: &mut [u32]) -> usize {
-    unsafe {
-        streamvbyte::streamvbyte_decode(compressed_data.as_ptr(), output.as_mut_ptr(), output.len())
-    }
-}
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -1,29 +1,33 @@
-use Result;
+use core::SegmentId;
 use error::{ErrorKind, ResultExt};
-use serde_json;
 use schema::Schema;
-use std::sync::Arc;
+use serde_json;
 use std::borrow::BorrowMut;
 use std::fmt;
-use core::SegmentId;
-use directory::{Directory, MmapDirectory, RAMDirectory};
-use indexer::index_writer::open_index_writer;
-use core::searcher::Searcher;
-use std::convert::From;
-use num_cpus;
-use super::segment::Segment;
-use core::SegmentReader;
-use super::pool::Pool;
-use core::SegmentMeta;
+use std::sync::Arc;
+use Result;
+
 use super::pool::LeasedItem;
-use std::path::Path;
-use core::IndexMeta;
-use IndexWriter;
-use directory::ManagedDirectory;
-use core::META_FILEPATH;
+use super::pool::Pool;
 use super::segment::create_segment;
+use super::segment::Segment;
+use core::searcher::Searcher;
+use core::IndexMeta;
+use core::SegmentMeta;
+use core::SegmentReader;
+use core::META_FILEPATH;
+use directory::ManagedDirectory;
+#[cfg(feature = "mmap")]
+use directory::MmapDirectory;
+use directory::{Directory, RAMDirectory};
+use indexer::index_writer::open_index_writer;
+use indexer::index_writer::HEAP_SIZE_MIN;
 use indexer::segment_updater::save_new_metas;
+use indexer::DirectoryLock;
+use num_cpus;
+use std::path::Path;
 use tokenizer::TokenizerManager;
+use IndexWriter;

 const NUM_SEARCHERS: usize = 12;

@@ -48,27 +52,17 @@ impl Index {
    /// This should only be used for unit tests.
    pub fn create_in_ram(schema: Schema) -> Index {
        let ram_directory = RAMDirectory::create();
-        // unwrap is ok here
-        let directory = ManagedDirectory::new(ram_directory).expect(
-            "Creating a managed directory from a brand new RAM directory \
-             should never fail.",
-        );
-        Index::from_directory(directory, schema).expect("Creating a RAMDirectory should never fail")
+        Index::create(ram_directory, schema).expect("Creating a RAMDirectory should never fail")
    }

    /// Creates a new index in a given filepath.
    /// The index will use the `MMapDirectory`.
    ///
    /// If a previous index was in this directory, then its meta file will be destroyed.
-    pub fn create<P: AsRef<Path>>(directory_path: P, schema: Schema) -> Result<Index> {
+    #[cfg(feature = "mmap")]
+    pub fn create_in_dir<P: AsRef<Path>>(directory_path: P, schema: Schema) -> Result<Index> {
        let mmap_directory = MmapDirectory::open(directory_path)?;
-        let directory = ManagedDirectory::new(mmap_directory)?;
-        Index::from_directory(directory, schema)
-    }
-
-    /// Accessor for the tokenizer manager.
-    pub fn tokenizers(&self) -> &TokenizerManager {
-        &self.tokenizers
+        Index::create(mmap_directory, schema)
    }

    /// Creates a new index in a temp directory.
@@ -79,12 +73,25 @@ impl Index {
    ///
    /// The temp directory is only used for testing the `MmapDirectory`.
    /// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
+    #[cfg(feature = "mmap")]
    pub fn create_from_tempdir(schema: Schema) -> Result<Index> {
        let mmap_directory = MmapDirectory::create_from_tempdir()?;
-        let directory = ManagedDirectory::new(mmap_directory)?;
+        Index::create(mmap_directory, schema)
+    }
+
+    /// Creates a new index given an implementation of the trait `Directory`
+    pub fn create<Dir: Directory>(dir: Dir, schema: Schema) -> Result<Index> {
+        let directory = ManagedDirectory::new(dir)?;
        Index::from_directory(directory, schema)
    }

+    /// Create a new index from a directory.
+    fn from_directory(mut directory: ManagedDirectory, schema: Schema) -> Result<Index> {
+        save_new_metas(schema.clone(), 0, directory.borrow_mut())?;
+        let metas = IndexMeta::with_schema(schema);
+        Index::create_from_metas(directory, &metas)
+    }
+
    /// Creates a new index given a directory and an `IndexMeta`.
    fn create_from_metas(directory: ManagedDirectory, metas: &IndexMeta) -> Result<Index> {
        let schema = metas.schema.clone();
@@ -98,27 +105,28 @@ impl Index {
        Ok(index)
    }

-    /// Create a new index from a directory.
-    pub fn from_directory(mut directory: ManagedDirectory, schema: Schema) -> Result<Index> {
-        save_new_metas(schema.clone(), 0, directory.borrow_mut())?;
-        let metas = IndexMeta::with_schema(schema);
-        Index::create_from_metas(directory, &metas)
+    /// Accessor for the tokenizer manager.
+    pub fn tokenizers(&self) -> &TokenizerManager {
+        &self.tokenizers
    }

    /// Opens a new directory from an index path.
-    pub fn open<P: AsRef<Path>>(directory_path: P) -> Result<Index> {
+    #[cfg(feature = "mmap")]
+    pub fn open_in_dir<P: AsRef<Path>>(directory_path: P) -> Result<Index> {
        let mmap_directory = MmapDirectory::open(directory_path)?;
-        let directory = ManagedDirectory::new(mmap_directory)?;
+        Index::open(mmap_directory)
+    }
+
+    /// Open the index using the provided directory
+    pub fn open<D: Directory>(directory: D) -> Result<Index> {
+        let directory = ManagedDirectory::new(directory)?;
        let metas = load_metas(&directory)?;
        Index::create_from_metas(directory, &metas)
    }

-    /// Returns the index opstamp.
-    ///
-    /// The opstamp is the number of documents that have been added
-    /// from the beginning of time, and until the moment of the last commit.
-    pub fn opstamp(&self) -> u64 {
-        load_metas(self.directory()).unwrap().opstamp
+    /// Reads the index meta file from the directory.
+    pub fn load_metas(&self) -> Result<IndexMeta> {
+        load_metas(self.directory())
    }

    /// Open a new index writer. Attempts to acquire a lockfile.
@@ -129,9 +137,13 @@ impl Index {
    /// `IndexWriter` on the system is accessing the index directory,
    /// it is safe to manually delete the lockfile.
    ///
-    /// num_threads specifies the number of indexing workers that
+    /// - `num_threads` defines the number of indexing workers that
    /// should work at the same time.
    ///
+    /// - `overall_heap_size_in_bytes` sets the amount of memory
+    /// allocated for all indexing thread.
+    /// Each thread will receive a budget of  `overall_heap_size_in_bytes / num_threads`.
+    ///
    /// # Errors
    /// If the lockfile already exists, returns `Error::FileAlreadyExists`.
    /// # Panics
@@ -139,20 +151,35 @@ impl Index {
    pub fn writer_with_num_threads(
        &self,
        num_threads: usize,
-        heap_size_in_bytes: usize,
+        overall_heap_size_in_bytes: usize,
    ) -> Result<IndexWriter> {
-        open_index_writer(self, num_threads, heap_size_in_bytes)
+        let directory_lock = DirectoryLock::lock(self.directory().box_clone())?;
+        let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads;
+        open_index_writer(
+            self,
+            num_threads,
+            heap_size_in_bytes_per_thread,
+            directory_lock,
+        )
    }

    /// Creates a multithreaded writer
-    /// It just calls `writer_with_num_threads` with the number of cores as `num_threads`
+    ///
+    /// Tantivy will automatically define the number of threads to use.
+    /// `overall_heap_size_in_bytes` is the total target memory usage that will be split
+    /// between a given number of threads.
    ///
    /// # Errors
    /// If the lockfile already exists, returns `Error::FileAlreadyExists`.
    /// # Panics
    /// If the heap size per thread is too small, panics.
-    pub fn writer(&self, heap_size_in_bytes: usize) -> Result<IndexWriter> {
-        self.writer_with_num_threads(num_cpus::get(), heap_size_in_bytes)
+    pub fn writer(&self, overall_heap_size_in_bytes: usize) -> Result<IndexWriter> {
+        let mut num_threads = num_cpus::get();
+        let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads;
+        if heap_size_in_bytes_per_thread < HEAP_SIZE_MIN {
+            num_threads = (overall_heap_size_in_bytes / HEAP_SIZE_MIN).max(1);
+        }
+        self.writer_with_num_threads(num_threads, overall_heap_size_in_bytes)
    }

    /// Accessor to the index schema
@@ -164,7 +191,8 @@ impl Index {

    /// Returns the list of segments that are searchable
    pub fn searchable_segments(&self) -> Result<Vec<Segment>> {
-        Ok(self.searchable_segment_metas()?
+        Ok(self
+            .searchable_segment_metas()?
            .into_iter()
            .map(|segment_meta| self.segment(segment_meta))
            .collect())
@@ -194,12 +222,13 @@ impl Index {
    /// Reads the meta.json and returns the list of
    /// `SegmentMeta` from the last commit.
    pub fn searchable_segment_metas(&self) -> Result<Vec<SegmentMeta>> {
-        Ok(load_metas(self.directory())?.segments)
+        Ok(self.load_metas()?.segments)
    }

    /// Returns the list of segment ids that are searchable.
    pub fn searchable_segment_ids(&self) -> Result<Vec<SegmentId>> {
-        Ok(self.searchable_segment_metas()?
+        Ok(self
+            .searchable_segment_metas()?
            .iter()
            .map(|segment_meta| segment_meta.id())
            .collect())
@@ -217,8 +246,9 @@ impl Index {
            .iter()
            .map(SegmentReader::open)
            .collect::<Result<_>>()?;
+        let schema = self.schema();
        let searchers = (0..NUM_SEARCHERS)
-            .map(|_| Searcher::from(segment_readers.clone()))
+            .map(|_| Searcher::new(schema.clone(), segment_readers.clone()))
            .collect();
        self.searcher_pool.publish_new_generation(searchers);
        Ok(())
--- a/src/core/index_meta.rs
+++ b/src/core/index_meta.rs
@@ -1,5 +1,7 @@
-use schema::Schema;
 use core::SegmentMeta;
+use schema::Schema;
+use serde_json;
+use std::fmt;

 /// Meta information about the `Index`.
 ///
@@ -9,11 +11,13 @@ use core::SegmentMeta;
 /// * the index `docstamp`
 /// * the schema
 ///
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Serialize, Deserialize)]
 pub struct IndexMeta {
    pub segments: Vec<SegmentMeta>,
    pub schema: Schema,
    pub opstamp: u64,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub payload: Option<String>,
 }

 impl IndexMeta {
@@ -22,6 +26,43 @@ impl IndexMeta {
            segments: vec![],
            schema,
            opstamp: 0u64,
+            payload: None,
        }
    }
 }
+
+impl fmt::Debug for IndexMeta {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{}",
+            serde_json::ser::to_string(self)
+                .expect("JSON serialization for IndexMeta should never fail.")
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::IndexMeta;
+    use schema::{SchemaBuilder, TEXT};
+    use serde_json;
+
+    #[test]
+    fn test_serialize_metas() {
+        let schema = {
+            let mut schema_builder = SchemaBuilder::new();
+            schema_builder.add_text_field("text", TEXT);
+            schema_builder.build()
+        };
+        let index_metas = IndexMeta {
+            segments: Vec::new(),
+            schema: schema,
+            opstamp: 0u64,
+            payload: None,
+        };
+        let json = serde_json::ser::to_string(&index_metas).expect("serialization failed");
+        assert_eq!(json, r#"{"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","tokenizer":"default"},"stored":false}}],"opstamp":0}"#);
+    }
+}
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -1,13 +1,13 @@
+use common::BinarySerializable;
+use compression::CompressedIntStream;
 use directory::{ReadOnlySource, SourceRead};
-use termdict::{TermDictionary, TermDictionaryImpl};
-use postings::{BlockSegmentPostings, SegmentPostings};
+use postings::FreqReadingOption;
 use postings::TermInfo;
+use postings::{BlockSegmentPostings, SegmentPostings};
+use schema::FieldType;
 use schema::IndexRecordOption;
 use schema::Term;
-use std::cmp;
-use fastfield::DeleteBitSet;
-use schema::Schema;
-use compression::CompressedIntStream;
+use termdict::TermDictionary;

 /// The inverted index reader is in charge of accessing
 /// the inverted index associated to a specific field.
@@ -23,37 +23,54 @@ use compression::CompressedIntStream;
 /// `InvertedIndexReader` are created by calling
 /// the `SegmentReader`'s [`.inverted_index(...)`] method
 pub struct InvertedIndexReader {
-    termdict: TermDictionaryImpl,
+    termdict: TermDictionary,
    postings_source: ReadOnlySource,
    positions_source: ReadOnlySource,
-    delete_bitset: DeleteBitSet,
-    schema: Schema,
+    record_option: IndexRecordOption,
+    total_num_tokens: u64,
 }

 impl InvertedIndexReader {
    pub(crate) fn new(
-        termdict_source: ReadOnlySource,
+        termdict: TermDictionary,
        postings_source: ReadOnlySource,
        positions_source: ReadOnlySource,
-        delete_bitset: DeleteBitSet,
-        schema: Schema,
+        record_option: IndexRecordOption,
    ) -> InvertedIndexReader {
+        let total_num_tokens_data = postings_source.slice(0, 8);
+        let mut total_num_tokens_cursor = total_num_tokens_data.as_slice();
+        let total_num_tokens = u64::deserialize(&mut total_num_tokens_cursor).unwrap_or(0u64);
        InvertedIndexReader {
-            termdict: TermDictionaryImpl::from_source(termdict_source),
-            postings_source,
+            termdict,
+            postings_source: postings_source.slice_from(8),
            positions_source,
-            delete_bitset,
-            schema,
+            record_option,
+            total_num_tokens,
+        }
+    }
+
+    /// Creates an empty `InvertedIndexReader` object, which
+    /// contains no terms at all.
+    pub fn empty(field_type: FieldType) -> InvertedIndexReader {
+        let record_option = field_type
+            .get_index_record_option()
+            .unwrap_or(IndexRecordOption::Basic);
+        InvertedIndexReader {
+            termdict: TermDictionary::empty(field_type),
+            postings_source: ReadOnlySource::empty(),
+            positions_source: ReadOnlySource::empty(),
+            record_option,
+            total_num_tokens: 0u64,
        }
    }

    /// Returns the term info associated with the term.
    pub fn get_term_info(&self, term: &Term) -> Option<TermInfo> {
-        self.termdict.get(term.as_slice())
+        self.termdict.get(term.value_bytes())
    }

    /// Return the term dictionary datastructure.
-    pub fn terms(&self) -> &TermDictionaryImpl {
+    pub fn terms(&self) -> &TermDictionary {
        &self.termdict
    }

@@ -86,15 +103,19 @@ impl InvertedIndexReader {
    pub fn read_block_postings_from_terminfo(
        &self,
        term_info: &TermInfo,
-        option: IndexRecordOption,
+        requested_option: IndexRecordOption,
    ) -> BlockSegmentPostings {
        let offset = term_info.postings_offset as usize;
        let postings_data = self.postings_source.slice_from(offset);
-        let has_freq = option.has_freq();
+        let freq_reading_option = match (self.record_option, requested_option) {
+            (IndexRecordOption::Basic, _) => FreqReadingOption::NoFreq,
+            (_, IndexRecordOption::Basic) => FreqReadingOption::SkipFreq,
+            (_, _) => FreqReadingOption::ReadFreq,
+        };
        BlockSegmentPostings::from_data(
            term_info.doc_freq as usize,
            SourceRead::from(postings_data),
-            has_freq,
+            freq_reading_option,
        )
    }

@@ -108,7 +129,6 @@ impl InvertedIndexReader {
        option: IndexRecordOption,
    ) -> SegmentPostings {
        let block_postings = self.read_block_postings_from_terminfo(term_info, option);
-        let delete_bitset = self.delete_bitset.clone();
        let position_stream = {
            if option.has_positions() {
                let position_offset = term_info.positions_offset;
@@ -120,7 +140,13 @@ impl InvertedIndexReader {
                None
            }
        };
-        SegmentPostings::from_block_postings(block_postings, delete_bitset, position_stream)
+        SegmentPostings::from_block_postings(block_postings, position_stream)
+    }
+
+    /// Returns the total number of tokens recorded for all documents
+    /// (including deleted documents).
+    pub fn total_num_tokens(&self) -> u64 {
+        self.total_num_tokens
    }

    /// Returns the segment postings associated with the term, and with the given option,
@@ -134,19 +160,23 @@ impl InvertedIndexReader {
    /// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
    /// with `DocId`s and frequencies.
    pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
-        let field = term.field();
-        let field_entry = self.schema.get_field_entry(field);
        let term_info = get!(self.get_term_info(term));
-        let maximum_option = get!(field_entry.field_type().get_index_record_option());
-        let best_effort_option = cmp::min(maximum_option, option);
-        Some(self.read_postings_from_terminfo(&term_info, best_effort_option))
+        Some(self.read_postings_from_terminfo(&term_info, option))
+    }
+
+    pub(crate) fn read_postings_no_deletes(
+        &self,
+        term: &Term,
+        option: IndexRecordOption,
+    ) -> Option<SegmentPostings> {
+        let term_info = get!(self.get_term_info(term));
+        Some(self.read_postings_from_terminfo(&term_info, option))
    }

    /// Returns the number of documents containing the term.
    pub fn doc_freq(&self, term: &Term) -> u32 {
-        match self.get_term_info(term) {
-            Some(term_info) => term_info.doc_freq,
-            None => 0,
-        }
+        self.get_term_info(term)
+            .map(|term_info| term_info.doc_freq)
+            .unwrap_or(0u32)
    }
 }
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -1,24 +1,24 @@
-pub mod searcher;
 pub mod index;
-mod segment_reader;
-mod segment_id;
-mod segment_component;
-mod segment;
 mod index_meta;
-mod pool;
-mod segment_meta;
 mod inverted_index_reader;
+mod pool;
+pub mod searcher;
+mod segment;
+mod segment_component;
+mod segment_id;
+mod segment_meta;
+mod segment_reader;

+pub use self::index::Index;
+pub use self::index_meta::IndexMeta;
 pub use self::inverted_index_reader::InvertedIndexReader;
 pub use self::searcher::Searcher;
-pub use self::segment_component::SegmentComponent;
-pub use self::segment_id::SegmentId;
-pub use self::segment_reader::SegmentReader;
 pub use self::segment::Segment;
 pub use self::segment::SerializableSegment;
-pub use self::index::Index;
+pub use self::segment_component::SegmentComponent;
+pub use self::segment_id::SegmentId;
 pub use self::segment_meta::SegmentMeta;
-pub use self::index_meta::IndexMeta;
+pub use self::segment_reader::SegmentReader;

 use std::path::PathBuf;

--- a/src/core/pool.rs
+++ b/src/core/pool.rs
@@ -1,8 +1,8 @@
-use std::sync::atomic::AtomicUsize;
-use std::sync::atomic::Ordering;
+use crossbeam::sync::MsQueue;
 use std::mem;
 use std::ops::{Deref, DerefMut};
-use crossbeam::sync::MsQueue;
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
 use std::sync::Arc;

 pub struct GenerationItem<T> {
@@ -87,7 +87,8 @@ impl<T> Deref for LeasedItem<T> {
    type Target = T;

    fn deref(&self) -> &T {
-        &self.gen_item
+        &self
+            .gen_item
            .as_ref()
            .expect("Unwrapping a leased item should never fail")
            .item // unwrap is safe here
@@ -96,7 +97,8 @@ impl<T> Deref for LeasedItem<T> {

 impl<T> DerefMut for LeasedItem<T> {
    fn deref_mut(&mut self) -> &mut T {
-        &mut self.gen_item
+        &mut self
+            .gen_item
            .as_mut()
            .expect("Unwrapping a mut leased item should never fail")
            .item // unwrap is safe here
@@ -114,8 +116,8 @@ impl<T> Drop for LeasedItem<T> {
 #[cfg(test)]
 mod tests {

-    use std::iter;
    use super::Pool;
+    use std::iter;

    #[test]
    fn test_pool() {
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -1,16 +1,15 @@
-use Result;
-use core::SegmentReader;
-use schema::Document;
 use collector::Collector;
-use common::TimerTree;
-use query::Query;
-use DocId;
-use DocAddress;
-use schema::{Field, Term};
-use termdict::{TermDictionary, TermMerger};
-use std::sync::Arc;
-use std::fmt;
 use core::InvertedIndexReader;
+use core::SegmentReader;
+use query::Query;
+use schema::Document;
+use schema::Schema;
+use schema::{Field, Term};
+use std::fmt;
+use std::sync::Arc;
+use termdict::TermMerger;
+use DocAddress;
+use Result;

 /// Holds a list of `SegmentReader`s ready for search.
 ///
@@ -18,10 +17,18 @@ use core::InvertedIndexReader;
 /// the destruction of the `Searcher`.
 ///
 pub struct Searcher {
+    schema: Schema,
    segment_readers: Vec<SegmentReader>,
 }

 impl Searcher {
+    /// Creates a new `Searcher`
+    pub(crate) fn new(schema: Schema, segment_readers: Vec<SegmentReader>) -> Searcher {
+        Searcher {
+            schema,
+            segment_readers,
+        }
+    }
    /// Fetches a document from tantivy's store given a `DocAddress`.
    ///
    /// The searcher uses the segment ordinal to route the
@@ -32,21 +39,26 @@ impl Searcher {
        segment_reader.doc(doc_id)
    }

+    /// Access the schema associated to the index of this searcher.
+    pub fn schema(&self) -> &Schema {
+        &self.schema
+    }
+
    /// Returns the overall number of documents in the index.
-    pub fn num_docs(&self) -> DocId {
+    pub fn num_docs(&self) -> u64 {
        self.segment_readers
            .iter()
-            .map(|segment_reader| segment_reader.num_docs())
-            .fold(0u32, |acc, val| acc + val)
+            .map(|segment_reader| segment_reader.num_docs() as u64)
+            .sum::<u64>()
    }

    /// Return the overall number of documents containing
    /// the given term.
-    pub fn doc_freq(&self, term: &Term) -> u32 {
+    pub fn doc_freq(&self, term: &Term) -> u64 {
        self.segment_readers
            .iter()
-            .map(|segment_reader| segment_reader.inverted_index(term.field()).doc_freq(term))
-            .fold(0u32, |acc, val| acc + val)
+            .map(|segment_reader| segment_reader.inverted_index(term.field()).doc_freq(term) as u64)
+            .sum::<u64>()
    }

    /// Return the list of segment readers
@@ -60,13 +72,14 @@ impl Searcher {
    }

    /// Runs a query on the segment readers wrapped by the searcher
-    pub fn search<C: Collector>(&self, query: &Query, collector: &mut C) -> Result<TimerTree> {
+    pub fn search<C: Collector>(&self, query: &Query, collector: &mut C) -> Result<()> {
        query.search(self, collector)
    }

    /// Return the field searcher associated to a `Field`.
    pub fn field(&self, field: Field) -> FieldSearcher {
-        let inv_index_readers = self.segment_readers
+        let inv_index_readers = self
+            .segment_readers
            .iter()
            .map(|segment_reader| segment_reader.inverted_index(field))
            .collect::<Vec<_>>();
@@ -86,7 +99,8 @@ impl FieldSearcher {
    /// Returns a Stream over all of the sorted unique terms of
    /// for the given field.
    pub fn terms(&self) -> TermMerger {
-        let term_streamers: Vec<_> = self.inv_index_readers
+        let term_streamers: Vec<_> = self
+            .inv_index_readers
            .iter()
            .map(|inverted_index| inverted_index.terms().stream())
            .collect();
@@ -94,15 +108,10 @@ impl FieldSearcher {
    }
 }

-impl From<Vec<SegmentReader>> for Searcher {
-    fn from(segment_readers: Vec<SegmentReader>) -> Searcher {
-        Searcher { segment_readers }
-    }
-}
-
 impl fmt::Debug for Searcher {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let segment_ids = self.segment_readers
+        let segment_ids = self
+            .segment_readers
            .iter()
            .map(|segment_reader| segment_reader.segment_id())
            .collect::<Vec<_>>();
--- a/src/core/segment.rs
+++ b/src/core/segment.rs
@@ -1,16 +1,16 @@
-use Result;
-use std::path::PathBuf;
-use schema::Schema;
-use std::fmt;
-use core::SegmentId;
-use directory::{FileProtection, ReadOnlySource, WritePtr};
-use indexer::segment_serializer::SegmentSerializer;
 use super::SegmentComponent;
 use core::Index;
-use std::result;
-use directory::Directory;
+use core::SegmentId;
 use core::SegmentMeta;
 use directory::error::{OpenReadError, OpenWriteError};
+use directory::Directory;
+use directory::{FileProtection, ReadOnlySource, WritePtr};
+use indexer::segment_serializer::SegmentSerializer;
+use schema::Schema;
+use std::fmt;
+use std::path::PathBuf;
+use std::result;
+use Result;

 /// A segment is a piece of the index.
 #[derive(Clone)]
@@ -111,8 +111,8 @@ mod tests {

    use core::SegmentComponent;
    use directory::Directory;
-    use std::collections::HashSet;
    use schema::SchemaBuilder;
+    use std::collections::HashSet;
    use Index;

    #[test]
--- a/src/core/segment_component.rs
+++ b/src/core/segment_component.rs
@@ -1,3 +1,5 @@
+use std::slice;
+
 /// Enum describing each component of a tantivy segment.
 /// Each component is stored in its own file,
 /// using the pattern `segment_uuid`.`component_extension`,
@@ -26,7 +28,7 @@ pub enum SegmentComponent {

 impl SegmentComponent {
    /// Iterates through the components.
-    pub fn iterator() -> impl Iterator<Item = &'static SegmentComponent> {
+    pub fn iterator() -> slice::Iter<'static, SegmentComponent> {
        static SEGMENT_COMPONENTS: [SegmentComponent; 7] = [
            SegmentComponent::POSTINGS,
            SegmentComponent::POSITIONS,
--- a/src/core/segment_id.rs
+++ b/src/core/segment_id.rs
@@ -1,6 +1,6 @@
-use uuid::Uuid;
-use std::fmt;
 use std::cmp::{Ord, Ordering};
+use std::fmt;
+use uuid::Uuid;

 #[cfg(test)]
 use std::sync::atomic;
--- a/src/core/segment_meta.rs
+++ b/src/core/segment_meta.rs
@@ -1,7 +1,7 @@
-use core::SegmentId;
 use super::SegmentComponent;
-use std::path::PathBuf;
+use core::SegmentId;
 use std::collections::HashSet;
+use std::path::PathBuf;

 #[derive(Clone, Debug, Serialize, Deserialize)]
 struct DeleteMeta {
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -1,24 +1,30 @@
-use Result;
-use core::Segment;
-use core::SegmentId;
-use core::SegmentComponent;
-use std::sync::RwLock;
-use common::HasLen;
-use core::SegmentMeta;
-use fastfield::{self, FastFieldNotAvailableError};
-use fastfield::DeleteBitSet;
-use store::StoreReader;
-use directory::ReadOnlySource;
-use schema::Document;
-use DocId;
-use std::sync::Arc;
-use std::collections::HashMap;
 use common::CompositeFile;
-use std::fmt;
+use common::HasLen;
 use core::InvertedIndexReader;
+use core::Segment;
+use core::SegmentComponent;
+use core::SegmentId;
+use core::SegmentMeta;
+use error::ErrorKind;
+use fastfield::DeleteBitSet;
+use fastfield::FacetReader;
+use fastfield::FastFieldReader;
+use fastfield::{self, FastFieldNotAvailableError};
+use fastfield::{BytesFastFieldReader, FastValue, MultiValueIntFastFieldReader};
+use fieldnorm::FieldNormReader;
+use schema::Cardinality;
+use schema::Document;
 use schema::Field;
-use fastfield::{FastFieldReader, U64FastFieldReader};
+use schema::FieldType;
 use schema::Schema;
+use std::collections::HashMap;
+use std::fmt;
+use std::sync::Arc;
+use std::sync::RwLock;
+use store::StoreReader;
+use termdict::TermDictionary;
+use DocId;
+use Result;

 /// Entry point to access all of the datastructures of the `Segment`
 ///
@@ -31,6 +37,8 @@ use schema::Schema;
 /// The segment reader has a very low memory footprint,
 /// as close to all of the memory data is mmapped.
 ///
+///
+/// TODO fix not decoding docfreq
 #[derive(Clone)]
 pub struct SegmentReader {
    inv_idx_reader_cache: Arc<RwLock<HashMap<Field, Arc<InvertedIndexReader>>>>,
@@ -45,7 +53,7 @@ pub struct SegmentReader {
    fieldnorms_composite: CompositeFile,

    store_reader: StoreReader,
-    delete_bitset: DeleteBitSet,
+    delete_bitset_opt: Option<DeleteBitSet>,
    schema: Schema,
 }

@@ -67,10 +75,22 @@ impl SegmentReader {
        self.segment_meta.num_docs()
    }

+    /// Returns the schema of the index this segment belongs to.
+    pub fn schema(&self) -> &Schema {
+        &self.schema
+    }
+
    /// Return the number of documents that have been
    /// deleted in the segment.
    pub fn num_deleted_docs(&self) -> DocId {
-        self.delete_bitset.len() as DocId
+        self.delete_bitset()
+            .map(|delete_set| delete_set.len() as DocId)
+            .unwrap_or(0u32)
+    }
+
+    /// Returns true iff some of the documents of the segment have been deleted.
+    pub fn has_deletes(&self) -> bool {
+        self.delete_bitset().is_some()
    }

    /// Accessor to a segment's fast field reader given a field.
@@ -83,21 +103,95 @@ impl SegmentReader {
    ///
    /// # Panics
    /// May panic if the index is corrupted.
-    pub fn get_fast_field_reader<TFastFieldReader: FastFieldReader>(
+    pub fn fast_field_reader<Item: FastValue>(
        &self,
        field: Field,
-    ) -> fastfield::Result<TFastFieldReader> {
+    ) -> fastfield::Result<FastFieldReader<Item>> {
        let field_entry = self.schema.get_field_entry(field);
-        if !TFastFieldReader::is_enabled(field_entry.field_type()) {
-            Err(FastFieldNotAvailableError::new(field_entry))
-        } else {
+        if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue)
+        {
            self.fast_fields_composite
                .open_read(field)
                .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
-                .map(TFastFieldReader::open)
+                .map(FastFieldReader::open)
+        } else {
+            Err(FastFieldNotAvailableError::new(field_entry))
        }
    }

+    pub(crate) fn fast_field_reader_with_idx<Item: FastValue>(
+        &self,
+        field: Field,
+        idx: usize,
+    ) -> fastfield::Result<FastFieldReader<Item>> {
+        if let Some(ff_source) = self.fast_fields_composite.open_read_with_idx(field, idx) {
+            Ok(FastFieldReader::open(ff_source))
+        } else {
+            let field_entry = self.schema.get_field_entry(field);
+            Err(FastFieldNotAvailableError::new(field_entry))
+        }
+    }
+
+    /// Accessor to the `MultiValueIntFastFieldReader` associated to a given `Field`.
+    /// May panick if the field is not a multivalued fastfield of the type `Item`.
+    pub fn multi_fast_field_reader<Item: FastValue>(
+        &self,
+        field: Field,
+    ) -> fastfield::Result<MultiValueIntFastFieldReader<Item>> {
+        let field_entry = self.schema.get_field_entry(field);
+        if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues)
+        {
+            let idx_reader = self.fast_field_reader_with_idx(field, 0)?;
+            let vals_reader = self.fast_field_reader_with_idx(field, 1)?;
+            Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
+        } else {
+            Err(FastFieldNotAvailableError::new(field_entry))
+        }
+    }
+
+    /// Accessor to the `BytesFastFieldReader` associated to a given `Field`.
+    pub fn bytes_fast_field_reader(&self, field: Field) -> fastfield::Result<BytesFastFieldReader> {
+        let field_entry = self.schema.get_field_entry(field);
+        match field_entry.field_type() {
+            &FieldType::Bytes => {}
+            _ => return Err(FastFieldNotAvailableError::new(field_entry)),
+        }
+        let idx_reader = self
+            .fast_fields_composite
+            .open_read_with_idx(field, 0)
+            .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
+            .map(FastFieldReader::open)?;
+        let values = self
+            .fast_fields_composite
+            .open_read_with_idx(field, 1)
+            .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))?;
+        Ok(BytesFastFieldReader::open(idx_reader, values))
+    }
+
+    /// Accessor to the `FacetReader` associated to a given `Field`.
+    pub fn facet_reader(&self, field: Field) -> Result<FacetReader> {
+        let field_entry = self.schema.get_field_entry(field);
+        if field_entry.field_type() != &FieldType::HierarchicalFacet {
+            return Err(ErrorKind::InvalidArgument(format!(
+                "The field {:?} is not a \
+                 hierarchical facet.",
+                field_entry
+            )).into());
+        }
+        let term_ords_reader = self.multi_fast_field_reader(field)?;
+        let termdict_source = self.termdict_composite.open_read(field).ok_or_else(|| {
+            ErrorKind::InvalidArgument(format!(
+                "The field \"{}\" is a hierarchical \
+                 but this segment does not seem to have the field term \
+                 dictionary.",
+                field_entry.name()
+            ))
+        })?;
+        let termdict = TermDictionary::from_source(termdict_source);
+        let facet_reader = FacetReader::new(term_ords_reader, termdict);
+        Ok(facet_reader)
+    }
+
    /// Accessor to the segment's `Field norms`'s reader.
    ///
    /// Field norms are the length (in tokens) of the fields.
@@ -106,10 +200,17 @@ impl SegmentReader {
    ///
    /// They are simply stored as a fast field, serialized in
    /// the `.fieldnorm` file of the segment.
-    pub fn get_fieldnorms_reader(&self, field: Field) -> Option<U64FastFieldReader> {
-        self.fieldnorms_composite
-            .open_read(field)
-            .map(U64FastFieldReader::open)
+    pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader {
+        if let Some(fieldnorm_source) = self.fieldnorms_composite.open_read(field) {
+            FieldNormReader::open(fieldnorm_source)
+        } else {
+            let field_name = self.schema.get_field_name(field);
+            let err_msg = format!(
+                "Field norm not found for field {:?}. Was it market as indexed during indexing.",
+                field_name
+            );
+            panic!(err_msg);
+        }
    }

    /// Accessor to the segment's `StoreReader`.
@@ -142,11 +243,11 @@ impl SegmentReader {
        let fieldnorms_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
        let fieldnorms_composite = CompositeFile::open(&fieldnorms_data)?;

-        let delete_bitset = if segment.meta().has_deletes() {
+        let delete_bitset_opt = if segment.meta().has_deletes() {
            let delete_data = segment.open_read(SegmentComponent::DELETE)?;
-            DeleteBitSet::open(delete_data)
+            Some(DeleteBitSet::open(delete_data))
        } else {
-            DeleteBitSet::empty()
+            None
        };

        let schema = segment.schema();
@@ -159,44 +260,65 @@ impl SegmentReader {
            fieldnorms_composite,
            segment_id: segment.id(),
            store_reader,
-            delete_bitset,
+            delete_bitset_opt,
            positions_composite,
            schema,
        })
    }

    /// Returns a field reader associated to the field given in argument.
+    /// If the field was not present in the index during indexing time,
+    /// the InvertedIndexReader is empty.
    ///
    /// The field reader is in charge of iterating through the
    /// term dictionary associated to a specific field,
    /// and opening the posting list associated to any term.
    pub fn inverted_index(&self, field: Field) -> Arc<InvertedIndexReader> {
-        if let Some(inv_idx_reader) = self.inv_idx_reader_cache
+        if let Some(inv_idx_reader) = self
+            .inv_idx_reader_cache
            .read()
            .expect("Lock poisoned. This should never happen")
            .get(&field)
        {
-            Arc::clone(inv_idx_reader);
+            return Arc::clone(inv_idx_reader);
+        }
+        let field_entry = self.schema.get_field_entry(field);
+        let field_type = field_entry.field_type();
+        let record_option_opt = field_type.get_index_record_option();
+
+        if record_option_opt.is_none() {
+            panic!("Field {:?} does not seem indexed.", field_entry.name());
        }

-        let termdict_source: ReadOnlySource = self.termdict_composite
-            .open_read(field)
-            .expect("Index corrupted. Failed to open field term dictionary in composite file.");
+        let record_option = record_option_opt.unwrap();

-        let postings_source = self.postings_composite
-            .open_read(field)
-            .expect("Index corrupted. Failed to open field postings in composite file.");
+        let postings_source_opt = self.postings_composite.open_read(field);

-        let positions_source = self.positions_composite
+        if postings_source_opt.is_none() {
+            // no documents in the segment contained this field.
+            // As a result, no data is associated to the inverted index.
+            //
+            // Returns an empty inverted index.
+            return Arc::new(InvertedIndexReader::empty(field_type.clone()));
+        }
+
+        let postings_source = postings_source_opt.unwrap();
+
+        let termdict_source = self
+            .termdict_composite
+            .open_read(field)
+            .expect("Failed to open field term dictionary in composite file. Is the field indexed");
+
+        let positions_source = self
+            .positions_composite
            .open_read(field)
            .expect("Index corrupted. Failed to open field positions in composite file.");

        let inv_idx_reader = Arc::new(InvertedIndexReader::new(
-            termdict_source,
+            TermDictionary::from_source(termdict_source),
            postings_source,
            positions_source,
-            self.delete_bitset.clone(),
-            self.schema.clone(),
+            record_option,
        ));

        // by releasing the lock in between, we may end up opening the inverting index
@@ -224,14 +346,21 @@ impl SegmentReader {

    /// Returns the bitset representing
    /// the documents that have been deleted.
-    pub fn delete_bitset(&self) -> &DeleteBitSet {
-        &self.delete_bitset
+    pub fn delete_bitset(&self) -> Option<&DeleteBitSet> {
+        self.delete_bitset_opt.as_ref()
    }

    /// Returns true iff the `doc` is marked
    /// as deleted.
    pub fn is_deleted(&self, doc: DocId) -> bool {
-        self.delete_bitset.is_deleted(doc)
+        self.delete_bitset()
+            .map(|delete_set| delete_set.is_deleted(doc))
+            .unwrap_or(false)
+    }
+
+    /// Returns an iterator that will iterate over the alive document ids
+    pub fn doc_ids_alive(&self) -> SegmentReaderAliveDocsIterator {
+        SegmentReaderAliveDocsIterator::new(&self)
    }
 }

@@ -240,3 +369,90 @@ impl fmt::Debug for SegmentReader {
        write!(f, "SegmentReader({:?})", self.segment_id)
    }
 }
+
+/// Implements the iterator trait to allow easy iteration
+/// over non-deleted ("alive") DocIds in a SegmentReader
+pub struct SegmentReaderAliveDocsIterator<'a> {
+    reader: &'a SegmentReader,
+    max_doc: DocId,
+    current: DocId,
+}
+
+impl<'a> SegmentReaderAliveDocsIterator<'a> {
+    pub fn new(reader: &'a SegmentReader) -> SegmentReaderAliveDocsIterator<'a> {
+        SegmentReaderAliveDocsIterator {
+            reader: reader,
+            max_doc: reader.max_doc(),
+            current: 0,
+        }
+    }
+}
+
+impl<'a> Iterator for SegmentReaderAliveDocsIterator<'a> {
+    type Item = DocId;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // TODO: Use TinySet (like in BitSetDocSet) to speed this process up
+        if self.current >= self.max_doc {
+            return None;
+        }
+
+        // find the next alive doc id
+        while self.reader.is_deleted(self.current) {
+            self.current += 1;
+
+            if self.current >= self.max_doc {
+                return None;
+            }
+        }
+
+        // capture the current alive DocId
+        let result = Some(self.current);
+
+        // move down the chain
+        self.current += 1;
+
+        result
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use core::Index;
+    use schema::{SchemaBuilder, Term, STORED, TEXT};
+    use DocId;
+
+    #[test]
+    fn test_alive_docs_iterator() {
+        let mut schema_builder = SchemaBuilder::new();
+        schema_builder.add_text_field("name", TEXT | STORED);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema.clone());
+        let name = schema.get_field("name").unwrap();
+
+        {
+            let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+            index_writer.add_document(doc!(name => "tantivy"));
+            index_writer.add_document(doc!(name => "horse"));
+            index_writer.add_document(doc!(name => "jockey"));
+            index_writer.add_document(doc!(name => "cap"));
+
+            // we should now have one segment with two docs
+            index_writer.commit().unwrap();
+        }
+
+        {
+            let mut index_writer2 = index.writer(50_000_000).unwrap();
+            index_writer2.delete_term(Term::from_field_text(name, "horse"));
+            index_writer2.delete_term(Term::from_field_text(name, "cap"));
+
+            // ok, now we should have a deleted doc
+            index_writer2.commit().unwrap();
+        }
+
+        index.load_searchers().unwrap();
+        let searcher = index.searcher();
+        let docs: Vec<DocId> = searcher.segment_reader(0).doc_ids_alive().collect();
+        assert_eq!(vec![0u32, 2u32], docs);
+    }
+}
--- a/src/datastruct/mod.rs
+++ b/src/datastruct/mod.rs
@@ -1,4 +0,0 @@
-mod skip;
-pub mod stacker;
-
-pub use self::skip::{SkipList, SkipListBuilder};
--- a/src/datastruct/stacker/expull.rs
+++ b/src/datastruct/stacker/expull.rs
@@ -1,161 +0,0 @@
-use std::mem;
-use super::heap::{Heap, HeapAllocable};
-
-#[inline]
-pub fn is_power_of_2(val: u32) -> bool {
-    val & (val - 1) == 0
-}
-
-#[inline]
-pub fn jump_needed(val: u32) -> bool {
-    val > 3 && is_power_of_2(val)
-}
-
-#[derive(Debug, Clone)]
-pub struct ExpUnrolledLinkedList {
-    len: u32,
-    end: u32,
-    val0: u32,
-    val1: u32,
-    val2: u32,
-    next: u32, // inline  of the first block
-}
-
-impl ExpUnrolledLinkedList {
-    pub fn iter<'a>(&self, addr: u32, heap: &'a Heap) -> ExpUnrolledLinkedListIterator<'a> {
-        ExpUnrolledLinkedListIterator {
-            heap,
-            addr: addr + 2u32 * (mem::size_of::<u32>() as u32),
-            len: self.len,
-            consumed: 0,
-        }
-    }
-
-    pub fn push(&mut self, val: u32, heap: &Heap) {
-        self.len += 1;
-        if jump_needed(self.len) {
-            // we need to allocate another block.
-            // ... As we want to grow block exponentially
-            // the next block as a size of (length so far),
-            // and we need to add 1u32 to store the pointer
-            // to the next element.
-            let new_block_size: usize = (self.len as usize + 1) * mem::size_of::<u32>();
-            let new_block_addr: u32 = heap.allocate_space(new_block_size);
-            heap.set(self.end, &new_block_addr);
-            self.end = new_block_addr;
-        }
-        heap.set(self.end, &val);
-        self.end += mem::size_of::<u32>() as u32;
-    }
-}
-
-impl HeapAllocable for u32 {
-    fn with_addr(_addr: u32) -> u32 {
-        0u32
-    }
-}
-
-impl HeapAllocable for ExpUnrolledLinkedList {
-    fn with_addr(addr: u32) -> ExpUnrolledLinkedList {
-        let last_addr = addr + mem::size_of::<u32>() as u32 * 2u32;
-        ExpUnrolledLinkedList {
-            len: 0u32,
-            end: last_addr,
-            val0: 0u32,
-            val1: 0u32,
-            val2: 0u32,
-            next: 0u32,
-        }
-    }
-}
-
-pub struct ExpUnrolledLinkedListIterator<'a> {
-    heap: &'a Heap,
-    addr: u32,
-    len: u32,
-    consumed: u32,
-}
-
-impl<'a> Iterator for ExpUnrolledLinkedListIterator<'a> {
-    type Item = u32;
-
-    fn next(&mut self) -> Option<u32> {
-        if self.consumed == self.len {
-            None
-        } else {
-            let addr: u32;
-            self.consumed += 1;
-            if jump_needed(self.consumed) {
-                addr = *self.heap.get_mut_ref(self.addr);
-            } else {
-                addr = self.addr;
-            }
-            self.addr = addr + mem::size_of::<u32>() as u32;
-            Some(*self.heap.get_mut_ref(addr))
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use super::super::heap::Heap;
-    use test::Bencher;
-
-    const NUM_STACK: usize = 10_000;
-    const STACK_SIZE: u32 = 1000;
-
-    #[test]
-    fn test_stack() {
-        let heap = Heap::with_capacity(1_000_000);
-        let (addr, stack) = heap.allocate_object::<ExpUnrolledLinkedList>();
-        stack.push(1u32, &heap);
-        stack.push(2u32, &heap);
-        stack.push(4u32, &heap);
-        stack.push(8u32, &heap);
-        {
-            let mut it = stack.iter(addr, &heap);
-            assert_eq!(it.next().unwrap(), 1u32);
-            assert_eq!(it.next().unwrap(), 2u32);
-            assert_eq!(it.next().unwrap(), 4u32);
-            assert_eq!(it.next().unwrap(), 8u32);
-            assert!(it.next().is_none());
-        }
-    }
-
-    #[bench]
-    fn bench_push_vec(bench: &mut Bencher) {
-        bench.iter(|| {
-            let mut vecs = Vec::with_capacity(100);
-            for _ in 0..NUM_STACK {
-                vecs.push(Vec::new());
-            }
-            for s in 0..NUM_STACK {
-                for i in 0u32..STACK_SIZE {
-                    let t = s * 392017 % NUM_STACK;
-                    vecs[t].push(i);
-                }
-            }
-        });
-    }
-
-    #[bench]
-    fn bench_push_stack(bench: &mut Bencher) {
-        let heap = Heap::with_capacity(64_000_000);
-        bench.iter(|| {
-            let mut stacks = Vec::with_capacity(100);
-            for _ in 0..NUM_STACK {
-                let (_, stack) = heap.allocate_object::<ExpUnrolledLinkedList>();
-                stacks.push(stack);
-            }
-            for s in 0..NUM_STACK {
-                for i in 0u32..STACK_SIZE {
-                    let t = s * 392017 % NUM_STACK;
-                    stacks[t].push(i, &heap);
-                }
-            }
-            heap.clear();
-        });
-    }
-}
--- a/src/datastruct/stacker/hashmap.rs
+++ b/src/datastruct/stacker/hashmap.rs
@@ -1,300 +0,0 @@
-use std::iter;
-use std::mem;
-use super::heap::{BytesRef, Heap, HeapAllocable};
-
-mod murmurhash2 {
-
-    const SEED: u32 = 3_242_157_231u32;
-
-    #[inline(always)]
-    pub fn murmurhash2(key: &[u8]) -> u32 {
-        let mut key_ptr: *const u32 = key.as_ptr() as *const u32;
-        let m: u32 = 0x5bd1_e995;
-        let r = 24;
-        let len = key.len() as u32;
-
-        let mut h: u32 = SEED ^ len;
-        let num_blocks = len >> 2;
-        for _ in 0..num_blocks {
-            let mut k: u32 = unsafe { *key_ptr };
-            k = k.wrapping_mul(m);
-            k ^= k >> r;
-            k = k.wrapping_mul(m);
-            k = k.wrapping_mul(m);
-            h ^= k;
-            key_ptr = key_ptr.wrapping_offset(1);
-        }
-
-        // Handle the last few bytes of the input array
-        let remaining = len & 3;
-        let key_ptr_u8: *const u8 = key_ptr as *const u8;
-        match remaining {
-            3 => {
-                h ^= unsafe { u32::from(*key_ptr_u8.wrapping_offset(2)) } << 16;
-                h ^= unsafe { u32::from(*key_ptr_u8.wrapping_offset(1)) } << 8;
-                h ^= unsafe { u32::from(*key_ptr_u8) };
-                h = h.wrapping_mul(m);
-            }
-            2 => {
-                h ^= unsafe { u32::from(*key_ptr_u8.wrapping_offset(1)) } << 8;
-                h ^= unsafe { u32::from(*key_ptr_u8) };
-                h = h.wrapping_mul(m);
-            }
-            1 => {
-                h ^= unsafe { u32::from(*key_ptr_u8) };
-                h = h.wrapping_mul(m);
-            }
-            _ => {}
-        }
-        h ^= h >> 13;
-        h = h.wrapping_mul(m);
-        h ^ (h >> 15)
-    }
-}
-
-/// Split the thread memory budget into
-/// - the heap size
-/// - the hash table "table" itself.
-///
-/// Returns (the heap size in bytes, the hash table size in number of bits)
-pub(crate) fn split_memory(per_thread_memory_budget: usize) -> (usize, usize) {
-    let table_size_limit: usize = per_thread_memory_budget / 5;
-    let compute_table_size = |num_bits: usize| {
-        (1 << num_bits) * mem::size_of::<KeyValue>()
-    };
-    let table_num_bits: usize = (1..)
-        .into_iter()
-        .take_while(|num_bits: &usize| compute_table_size(*num_bits) < table_size_limit)
-        .last()
-        .expect(&format!(
-            "Per thread memory is too small: {}",
-            per_thread_memory_budget
-        ));
-    let table_size = compute_table_size(table_num_bits);
-    let heap_size = per_thread_memory_budget - table_size;
-    (heap_size, table_num_bits)
-}
-
-/// `KeyValue` is the item stored in the hash table.
-/// The key is actually a `BytesRef` object stored in an external heap.
-/// The `value_addr` also points to an address in the heap.
-///
-/// The key and the value are actually stored contiguously.
-/// For this reason, the (start, stop) information is actually redundant
-/// and can be simplified in the future
-#[derive(Copy, Clone, Default)]
-struct KeyValue {
-    key_value_addr: BytesRef,
-    hash: u32,
-}
-
-impl KeyValue {
-    fn is_empty(&self) -> bool {
-        self.key_value_addr.is_null()
-    }
-}
-
-/// Customized `HashMap` with string keys
-///
-/// This `HashMap` takes String as keys. Keys are
-/// stored in a user defined heap.
-///
-/// The quirky API has the benefit of avoiding
-/// the computation of the hash of the key twice,
-/// or copying the key as long as there is no insert.
-///
-pub struct HashMap<'a> {
-    table: Box<[KeyValue]>,
-    heap: &'a Heap,
-    mask: usize,
-    occupied: Vec<usize>,
-}
-
-struct QuadraticProbing {
-    hash: usize,
-    i: usize,
-    mask: usize,
-}
-
-impl QuadraticProbing {
-    fn compute(hash: usize, mask: usize) -> QuadraticProbing {
-        QuadraticProbing { hash, i: 0, mask }
-    }
-
-    #[inline]
-    fn next_probe(&mut self) -> usize {
-        self.i += 1;
-        (self.hash + self.i * self.i) & self.mask
-    }
-}
-
-impl<'a> HashMap<'a> {
-    pub fn new(num_bucket_power_of_2: usize, heap: &'a Heap) -> HashMap<'a> {
-        let table_size = 1 << num_bucket_power_of_2;
-        let table: Vec<KeyValue> = iter::repeat(KeyValue::default()).take(table_size).collect();
-        HashMap {
-            table: table.into_boxed_slice(),
-            heap,
-            mask: table_size - 1,
-            occupied: Vec::with_capacity(table_size / 2),
-        }
-    }
-
-    fn probe(&self, hash: u32) -> QuadraticProbing {
-        QuadraticProbing::compute(hash as usize, self.mask)
-    }
-
-    pub fn is_saturated(&self) -> bool {
-        self.table.len() < self.occupied.len() * 3
-    }
-
-    #[inline(never)]
-    fn get_key_value(&self, bytes_ref: BytesRef) -> (&[u8], u32) {
-        let key_bytes: &[u8] = self.heap.get_slice(bytes_ref);
-        let expull_addr: u32 = bytes_ref.addr() + 2 + key_bytes.len() as u32;
-        (key_bytes, expull_addr)
-    }
-
-    pub fn set_bucket(&mut self, hash: u32, key_bytes_ref: BytesRef, bucket: usize) {
-        self.occupied.push(bucket);
-        self.table[bucket] = KeyValue {
-            key_value_addr: key_bytes_ref,
-            hash,
-        };
-    }
-
-    pub fn iter<'b: 'a>(&'b self) -> impl Iterator<Item = (&'a [u8], u32)> + 'b {
-        self.occupied.iter().cloned().map(move |bucket: usize| {
-            let kv = self.table[bucket];
-            self.get_key_value(kv.key_value_addr)
-        })
-    }
-
-    pub fn get_or_create<S: AsRef<[u8]>, V: HeapAllocable>(&mut self, key: S) -> &mut V {
-        let key_bytes: &[u8] = key.as_ref();
-        let hash = murmurhash2::murmurhash2(key.as_ref());
-        let mut probe = self.probe(hash);
-        loop {
-            let bucket = probe.next_probe();
-            let kv: KeyValue = self.table[bucket];
-            if kv.is_empty() {
-                let key_bytes_ref = self.heap.allocate_and_set(key_bytes);
-                let (addr, val): (u32, &mut V) = self.heap.allocate_object();
-                assert_eq!(addr, key_bytes_ref.addr() + 2 + key_bytes.len() as u32);
-                self.set_bucket(hash, key_bytes_ref, bucket);
-                return val;
-            } else if kv.hash == hash {
-                let (stored_key, expull_addr): (&[u8], u32) = self.get_key_value(kv.key_value_addr);
-                if stored_key == key_bytes {
-                    return self.heap.get_mut_ref(expull_addr);
-                }
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use super::super::heap::{Heap, HeapAllocable};
-    use super::murmurhash2::murmurhash2;
-    use test::Bencher;
-    use std::collections::HashSet;
-    use super::split_memory;
-
-    struct TestValue {
-        val: u32,
-        _addr: u32,
-    }
-
-    impl HeapAllocable for TestValue {
-        fn with_addr(addr: u32) -> TestValue {
-            TestValue {
-                val: 0u32,
-                _addr: addr,
-            }
-        }
-    }
-
-    #[test]
-    fn test_hashmap_size() {
-        assert_eq!(split_memory(100_000), (67232, 9));
-        assert_eq!(split_memory(1_000_000), (737856, 12));
-        assert_eq!(split_memory(10_000_000), (7902848, 15));
-    }
-
-    #[test]
-    fn test_hash_map() {
-        let heap = Heap::with_capacity(2_000_000);
-        let mut hash_map: HashMap = HashMap::new(18, &heap);
-        {
-            let v: &mut TestValue = hash_map.get_or_create("abc");
-            assert_eq!(v.val, 0u32);
-            v.val = 3u32;
-        }
-        {
-            let v: &mut TestValue = hash_map.get_or_create("abcd");
-            assert_eq!(v.val, 0u32);
-            v.val = 4u32;
-        }
-        {
-            let v: &mut TestValue = hash_map.get_or_create("abc");
-            assert_eq!(v.val, 3u32);
-        }
-        {
-            let v: &mut TestValue = hash_map.get_or_create("abcd");
-            assert_eq!(v.val, 4u32);
-        }
-        let mut iter_values = hash_map.iter();
-        {
-            let (_, addr) = iter_values.next().unwrap();
-            let val: &TestValue = heap.get_ref(addr);
-            assert_eq!(val.val, 3u32);
-        }
-        {
-            let (_, addr) = iter_values.next().unwrap();
-            let val: &TestValue = heap.get_ref(addr);
-            assert_eq!(val.val, 4u32);
-        }
-        assert!(iter_values.next().is_none());
-    }
-
-    #[test]
-    fn test_murmur() {
-        let s1 = "abcdef";
-        let s2 = "abcdeg";
-        for i in 0..5 {
-            assert_eq!(
-                murmurhash2(&s1[i..5].as_bytes()),
-                murmurhash2(&s2[i..5].as_bytes())
-            );
-        }
-    }
-
-    #[test]
-    fn test_murmur_collisions() {
-        let mut set: HashSet<u32> = HashSet::default();
-        for i in 0..10_000 {
-            let s = format!("hash{}", i);
-            let hash = murmurhash2(s.as_bytes());
-            set.insert(hash);
-        }
-        assert_eq!(set.len(), 10_000);
-    }
-
-    #[bench]
-    fn bench_murmurhash_2(b: &mut Bencher) {
-        let keys: Vec<&'static str> =
-            vec!["wer qwe qwe qwe ", "werbq weqweqwe2 ", "weraq weqweqwe3 "];
-        b.iter(|| {
-            keys.iter()
-                .map(|&s| s.as_bytes())
-                .map(murmurhash2::murmurhash2)
-                .map(|h| h as u64)
-                .last()
-                .unwrap()
-        });
-    }
-
-}
--- a/src/datastruct/stacker/heap.rs
+++ b/src/datastruct/stacker/heap.rs
@@ -1,233 +0,0 @@
-use std::cell::UnsafeCell;
-use std::mem;
-use std::ptr;
-use byteorder::{ByteOrder, NativeEndian};
-
-/// `BytesRef` refers to a slice in tantivy's custom `Heap`.
-///
-/// The slice will encode the length of the `&[u8]` slice
-/// on 16-bits, and then the data is encoded.
-#[derive(Copy, Clone)]
-pub struct BytesRef(u32);
-
-impl BytesRef {
-    pub fn is_null(&self) -> bool {
-        self.0 == u32::max_value()
-    }
-
-    pub fn addr(&self) -> u32 {
-        self.0
-    }
-}
-
-impl Default for BytesRef {
-    fn default() -> BytesRef {
-        BytesRef(u32::max_value())
-    }
-}
-
-/// Object that can be allocated in tantivy's custom `Heap`.
-pub trait HeapAllocable {
-    fn with_addr(addr: u32) -> Self;
-}
-
-/// Tantivy's custom `Heap`.
-pub struct Heap {
-    inner: UnsafeCell<InnerHeap>,
-}
-
-#[cfg_attr(feature = "cargo-clippy", allow(mut_from_ref))]
-impl Heap {
-    /// Creates a new heap with a given capacity
-    pub fn with_capacity(num_bytes: usize) -> Heap {
-        Heap {
-            inner: UnsafeCell::new(InnerHeap::with_capacity(num_bytes)),
-        }
-    }
-
-    fn inner(&self) -> &mut InnerHeap {
-        unsafe { &mut *self.inner.get() }
-    }
-
-    /// Clears the heap. All the underlying data is lost.
-    ///
-    /// This heap does not support deallocation.
-    /// This method is the only way to free memory.
-    pub fn clear(&self) {
-        self.inner().clear();
-    }
-
-    /// Return amount of free space, in bytes.
-    pub fn num_free_bytes(&self) -> u32 {
-        self.inner().num_free_bytes()
-    }
-
-    /// Allocate a given amount of space and returns an address
-    /// in the Heap.
-    pub fn allocate_space(&self, num_bytes: usize) -> u32 {
-        self.inner().allocate_space(num_bytes)
-    }
-
-    /// Allocate an object in the heap
-    pub fn allocate_object<V: HeapAllocable>(&self) -> (u32, &mut V) {
-        let addr = self.inner().allocate_space(mem::size_of::<V>());
-        let v: V = V::with_addr(addr);
-        self.inner().set(addr, &v);
-        (addr, self.inner().get_mut_ref(addr))
-    }
-
-    /// Stores a `&[u8]` in the heap and returns the destination BytesRef.
-    pub fn allocate_and_set(&self, data: &[u8]) -> BytesRef {
-        self.inner().allocate_and_set(data)
-    }
-
-    /// Fetches the `&[u8]` stored on the slice defined by the `BytesRef`
-    /// given as argumetn
-    pub fn get_slice(&self, bytes_ref: BytesRef) -> &[u8] {
-        self.inner().get_slice(bytes_ref)
-    }
-
-    /// Stores an item's data in the heap, at the given `address`.
-    pub fn set<Item>(&self, addr: u32, val: &Item) {
-        self.inner().set(addr, val);
-    }
-
-    /// Returns a mutable reference for an object at a given Item.
-    pub fn get_mut_ref<Item>(&self, addr: u32) -> &mut Item {
-        self.inner().get_mut_ref(addr)
-    }
-
-    /// Returns a mutable reference to an `Item` at a given `addr`.
-    #[cfg(test)]
-    pub fn get_ref<Item>(&self, addr: u32) -> &mut Item {
-        self.get_mut_ref(addr)
-    }
-}
-
-struct InnerHeap {
-    buffer: Vec<u8>,
-    buffer_len: u32,
-    used: u32,
-    next_heap: Option<Box<InnerHeap>>,
-}
-
-impl InnerHeap {
-    pub fn with_capacity(num_bytes: usize) -> InnerHeap {
-        let buffer: Vec<u8> = vec![0u8; num_bytes];
-        InnerHeap {
-            buffer,
-            buffer_len: num_bytes as u32,
-            next_heap: None,
-            used: 0u32,
-        }
-    }
-
-    pub fn clear(&mut self) {
-        self.used = 0u32;
-        self.next_heap = None;
-    }
-
-    // Returns the number of free bytes. If the buffer
-    // has reached it's capacity and overflowed to another buffer, return 0.
-    pub fn num_free_bytes(&self) -> u32 {
-        if self.next_heap.is_some() {
-            0u32
-        } else {
-            self.buffer_len - self.used
-        }
-    }
-
-    pub fn allocate_space(&mut self, num_bytes: usize) -> u32 {
-        let addr = self.used;
-        self.used += num_bytes as u32;
-        if self.used <= self.buffer_len {
-            addr
-        } else {
-            if self.next_heap.is_none() {
-                info!(
-                    r#"Exceeded heap size. The segment will be committed right
-                         after indexing this document."#,
-                );
-                self.next_heap = Some(Box::new(InnerHeap::with_capacity(self.buffer_len as usize)));
-            }
-            self.next_heap.as_mut().unwrap().allocate_space(num_bytes) + self.buffer_len
-        }
-    }
-
-    fn get_slice(&self, bytes_ref: BytesRef) -> &[u8] {
-        let start = bytes_ref.0;
-        if start >= self.buffer_len {
-            self.next_heap
-                .as_ref()
-                .unwrap()
-                .get_slice(BytesRef(start - self.buffer_len))
-        } else {
-            let start = start as usize;
-            let len = NativeEndian::read_u16(&self.buffer[start..start + 2]) as usize;
-            &self.buffer[start + 2..start + 2 + len]
-        }
-    }
-
-    fn get_mut_slice(&mut self, start: u32, stop: u32) -> &mut [u8] {
-        if start >= self.buffer_len {
-            self.next_heap
-                .as_mut()
-                .unwrap()
-                .get_mut_slice(start - self.buffer_len, stop - self.buffer_len)
-        } else {
-            &mut self.buffer[start as usize..stop as usize]
-        }
-    }
-
-    fn allocate_and_set(&mut self, data: &[u8]) -> BytesRef {
-        assert!(data.len() < u16::max_value() as usize);
-        let total_len = 2 + data.len();
-        let start = self.allocate_space(total_len);
-        let total_buff = self.get_mut_slice(start, start + total_len as u32);
-        NativeEndian::write_u16(&mut total_buff[0..2], data.len() as u16);
-        total_buff[2..].clone_from_slice(data);
-        BytesRef(start)
-    }
-
-    fn get_mut(&mut self, addr: u32) -> *mut u8 {
-        if addr >= self.buffer_len {
-            self.next_heap
-                .as_mut()
-                .unwrap()
-                .get_mut(addr - self.buffer_len)
-        } else {
-            let addr_isize = addr as isize;
-            unsafe { self.buffer.as_mut_ptr().offset(addr_isize) }
-        }
-    }
-
-    fn get_mut_ref<Item>(&mut self, addr: u32) -> &mut Item {
-        if addr >= self.buffer_len {
-            self.next_heap
-                .as_mut()
-                .unwrap()
-                .get_mut_ref(addr - self.buffer_len)
-        } else {
-            let v_ptr_u8 = self.get_mut(addr) as *mut u8;
-            let v_ptr = v_ptr_u8 as *mut Item;
-            unsafe { &mut *v_ptr }
-        }
-    }
-
-    pub fn set<Item>(&mut self, addr: u32, val: &Item) {
-        if addr >= self.buffer_len {
-            self.next_heap
-                .as_mut()
-                .unwrap()
-                .set(addr - self.buffer_len, val);
-        } else {
-            let v_ptr: *const Item = val as *const Item;
-            let v_ptr_u8: *const u8 = v_ptr as *const u8;
-            debug_assert!(addr + mem::size_of::<Item>() as u32 <= self.used);
-            unsafe {
-                let dest_ptr: *mut u8 = self.get_mut(addr);
-                ptr::copy(v_ptr_u8, dest_ptr, mem::size_of::<Item>());
-            }
-        }
-    }
-}
--- a/src/datastruct/stacker/mod.rs
+++ b/src/datastruct/stacker/mod.rs
@@ -1,43 +0,0 @@
-pub(crate) mod hashmap;
-mod heap;
-mod expull;
-
-pub use self::heap::{Heap, HeapAllocable};
-pub use self::expull::ExpUnrolledLinkedList;
-pub use self::hashmap::HashMap;
-
-#[test]
-fn test_unrolled_linked_list() {
-    use std::collections;
-    let heap = Heap::with_capacity(30_000_000);
-    {
-        heap.clear();
-        let mut ks: Vec<usize> = (1..5).map(|k| k * 100).collect();
-        ks.push(2);
-        ks.push(3);
-        for k in (1..5).map(|k| k * 100) {
-            let mut hashmap: HashMap = HashMap::new(10, &heap);
-            for j in 0..k {
-                for i in 0..500 {
-                    let v: &mut ExpUnrolledLinkedList = hashmap.get_or_create(i.to_string());
-                    v.push(i * j, &heap);
-                }
-            }
-            let mut map_addr: collections::HashMap<Vec<u8>, u32> = collections::HashMap::new();
-            for (key, addr) in hashmap.iter() {
-                map_addr.insert(Vec::from(key), addr);
-            }
-
-            for i in 0..500 {
-                let key: String = i.to_string();
-                let addr: u32 = *map_addr.get(key.as_bytes()).unwrap();
-                let exp_pull: &ExpUnrolledLinkedList = heap.get_ref(addr);
-                let mut it = exp_pull.iter(addr, &heap);
-                for j in 0..k {
-                    assert_eq!(it.next().unwrap(), i * j);
-                }
-                assert!(!it.next().is_some());
-            }
-        }
-    }
-}
--- a/src/directory/directory.rs
+++ b/src/directory/directory.rs
@@ -1,11 +1,11 @@
-use std::marker::Send;
-use std::fmt;
-use std::path::Path;
 use directory::error::{DeleteError, OpenReadError, OpenWriteError};
 use directory::{ReadOnlySource, WritePtr};
-use std::result;
+use std::fmt;
 use std::io;
+use std::marker::Send;
 use std::marker::Sync;
+use std::path::Path;
+use std::result;

 /// Write-once read many (WORM) abstraction for where
 /// tantivy's data should be stored.
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Paul Masurel	c1022e23d2	Switching to stable rust in AppVeyor.	2018-06-22 14:33:42 +09:00
Paul Masurel	8ccbfdea5d	Preparing for release	2018-06-22 14:27:46 +09:00
Paul Masurel	badfce3a23	Preparing for release.	2018-06-22 14:09:14 +09:00
Dru Sellers	e301e0bc87	Add some simple doc tests (#320 ) * Add TopCollector doc test * Add CountCollector Doc Test * Add Doc Test for MultiCollector * Add ChainedCollector Doc Test * Expose Fuzzy Query where it should be * Add FuzzyTermQuery Doc Test * Expose RegexQuery * Regex Query Doc Test * Add TermQuery Doc Test * Add doc comments * fix test 🤦 * Added explanation about the complexity variables * Fixing unit tests * Single threads if you check docids	2018-06-19 10:45:20 +09:00
Dru Sellers	317baf4e75	Add in simple regex query support (#319 ) * Add fst_regex crate in * Reduce API surface area This doesn't need to be public * better test name * Pull Automaton weight out so it can be shared * Implement Regex Query	2018-06-16 14:08:30 +09:00
Paul Masurel	24398d94e4	Exposing the	2018-06-15 21:40:57 +09:00
Dru Sellers	360f4132eb	Standardizes the Index::open_* APIs (#318 ) * Relocate `from_directory` closer to its usage * Specific methods come before the generic method * Rename open methods to follow the lead of the create methods	2018-06-15 12:16:41 +09:00
Dru Sellers	2b8f02764b	Standardizes the Index::create_* APIs (#317 ) * Pull all creation methods next to each other The goal here is to make it clear which methods are performing the same function, and to assist with standardizing the API calls. * Make `from_directory` private This seems to be an internal function, so lets make it internal. * Rename `create` to `create_in_dir` This lets the name match the `create_in_ram` pattern and opens up `create` for the generic implementation. * Implement the generic create function All of the create methods now delegate to the common create function and future `create_in_*` functions now have a clear pattern to follow as well	2018-06-14 11:08:42 +09:00
Paul Masurel	0465876854	Issue/257 (#310 ) * Replaced lz4 by a pure rust implementation of snappy. Closes #257 * snappy is the default compression. One can use lz4 by enabling the lz4 feature flag. * Removed Compression trait	2018-06-12 19:02:57 +09:00
Dru Sellers	6f7b099370	Add AutomatonWeight to a fuzzy_search module and FuzzyQuery (#300 ) * Add AutomatonWeight to a fuzzy_search module * Hacking around ownership issues * Working through lifetime issues * Working through tests * fix test by lower casing the words (reducing distance) * code review changes * Suggestion on how to solve the borrow problem * clean up	2018-06-11 22:23:03 +09:00
Paul Masurel	84f5cc4388	Added an AUTHORS file. Closes #315 (#316 )	2018-06-11 22:21:58 +09:00
Paul Masurel	75aae0d2c2	Update README	2018-06-08 13:05:57 +09:00
Paul Masurel	009a3559be	atomicwrites 2.2.0 for ARM compilation	2018-06-06 07:13:09 +09:00
Paul Masurel	7a31669e9d	Disabling ARM targets	2018-06-05 12:22:00 +09:00
Paul Masurel	5185eb790b	Reduced heap usage in unit test	2018-06-05 10:02:10 +09:00
Paul Masurel	a3dffbf1c6	Added more ARM target.	2018-06-05 09:06:33 +09:00
Paul Masurel	857a5794d8	Updated nix version	2018-06-05 09:02:40 +09:00
Paul Masurel	b0a6fc1448	Reduce RAM usage	2018-06-04 11:20:24 +09:00
Paul Masurel	989d52bea4	Updated atomicwrites version.	2018-06-04 10:00:21 +09:00
Paul Masurel	09661ea7ec	Added cross testing on different platforms	2018-06-04 09:47:53 +09:00
Paul Masurel	b59132966f	Better heap (#311 ) * Changed the heap to a paged memory arena. * Trying to simplify the indexing term hashmap * Exploding datastruct * Removed some complexity in bitpacker	2018-06-04 09:39:18 +09:00
Paul Masurel	863d3411bc	Update Cargo.toml	2018-05-31 15:54:34 +09:00
Paul Masurel	8a55d133ab	Showing Appveyor CI badge for the master branch .. before the last build was shown.	2018-05-28 13:44:53 +09:00
Jason Wolfe	432d49d814	Expose parameters of RangeQuery for external usage (#309 )	2018-05-19 14:29:25 +09:00
Jason Wolfe	0cea706f10	Add docs to new Query methods (#307 )	2018-05-18 13:53:29 +09:00
Paul Masurel	71d41ca209	Added Google to the license	2018-05-18 10:13:23 +09:00
Paul Masurel	bc69dab822	cargo fmt	2018-05-18 10:08:05 +09:00
Jason Wolfe	72acad0921	Add box_clone() and downcast::Any to Query (#303 )	2018-05-18 09:53:11 +09:00
Paul Masurel	c9459f74e8	Update docs about TermDict.	2018-05-18 09:20:39 +09:00
Dru Sellers	08d2cc6c7b	Make it possible to stream the terms matching an Automaton (#297 ) * rustfmt and some English grammar * sort cargo.toml crates * WIP: something to show * Remove example for now * Implement desired method * Resolving Generic Type Arguments * Resolve Generic Types * Banging around on the tests * DANGER! Change unsafe usage based on compiler warnings * Unscrew up my rebase * Clean Up Type Spam Default Types FTW * typo * better variable names * Remove Duplicate Levenshtein crate	2018-05-11 12:41:14 -07:00
Dru Sellers	82d87416c2	Implement StopWords Filter (#292 ) * Implement StopWords Filter - added example doctest for alphanum_only.rs so that I could drive my own test of the stopword filter * Style Cop * Switch HashSet Hasher to FNV for speed * Update Change Log * fix missed location renaming	2018-05-09 18:40:41 -07:00
Paul Masurel	96b2c2971e	Testing actual doc ids in unit test	2018-05-09 09:14:22 -07:00
Dru Sellers	162afd73f6	Alive docs iterator (#293 ) * Add non-deleted DocId iterator to SegmentReader Closes #287 * Add Todo * Add Unit Test * Improving test based on feedback - found bug and fixed it. :) * Reestablish changes post rebase for clean merge	2018-05-09 09:03:27 -07:00
Paul Masurel	ddfd87fa59	Merge branch 'master' of github.com:tantivy-search/tantivy	2018-05-08 00:08:17 -07:00
Paul Masurel	24050d0eb5	Remove some unsafe stuff, justified some of it.	2018-05-07 23:57:53 -07:00
Jason Wolfe	89eb209ece	#294 : Make fieldnorm module public, add documentation (#295 )	2018-05-07 20:20:38 -07:00
Paul Masurel	9a0b7f9855	Rustfmt	2018-05-07 19:50:35 -07:00
Jason Wolfe	8e343b1ca3	Add fast field for associating arbitrary bytes to a document (#275 ) * Add fast field for associating arbitrary bytes to a document * Fix unused macro_use warning * Improvements from code review * Make BytesFastFieldWriter public * Fix json parsing validation failure * Add bytes fast field to CHANGELOG.md * Fix compile errors from merge * Support merging * Address misc code review comments * Fix comments from CR	2018-05-07 19:30:31 -07:00
Paul Masurel	99c0b84036	Integrating #274 , #280 , #289 into master (#290 ) * Integrating bugfixes into master Closes #274 Closes #280 Closes #289 * Next version will be 0.6	2018-05-06 09:48:25 -07:00
Dru Sellers	ca74c14647	Simple Implementation of NGram Tokenizer (#278 ) * Simple Implementation of NGram Tokenizer It does not yet support edges It could probably be better in many "rusty" ways But the test is passing, so I'll call this a good stopping point for the day. * Remove Ngram from manager. Too many variations * Basic configuration model Should the extensive tests exist here? * Add Sample to provide an End to End testing * Basic Edgegram support * cleanup * code feedback * More code review feedback processed	2018-05-06 09:47:49 -07:00
Dru Sellers	68ee18e4e8	Add Index::open_directory function (#285 ) * Add Index::open_directory function * dry	2018-05-03 00:07:46 -07:00
Paul Masurel	5637657c2f	Removed ptr dereference for explicit ptr::read_unaligned	2018-04-25 19:15:32 +09:00
Paul Masurel	2e3c9a8878	Bugfix in murmurhash.	2018-04-25 19:06:31 +09:00
Paul Masurel	78673172d0	Cargo fmt	2018-04-21 20:05:36 +09:00
Paul Masurel	175b76f119	Removed `streamdict` Closes #271	2018-04-21 19:55:41 +09:00
Paul Masurel	9b79e21bd7	Returning error when schema is not valid for a given query.	2018-04-19 13:02:30 +09:00
Paul Masurel	5e38ae336f	Bump tantivy version and readded win deps	2018-04-17 18:27:57 +09:00
Paul Masurel	8604351f59	Hide some of the API Added some doc.	2018-04-17 13:31:22 +09:00
Paul Masurel	6a48953d8a	Closes #266 (#268 ) PhraseQuery panics with a nice error message when the underlying field does not have any positions. The `QueryParser` fails as well with a dedicated error.	2018-04-17 10:03:15 +09:00
pmasurel	0804b42afa	Checking the type of range queries	2018-04-16 14:01:10 +09:00
Paul Masurel	8083bc6eef	bench working	2018-04-15 12:25:38 +09:00
Paul Masurel	0156f88265	Compiles in stable rust	2018-04-15 11:03:44 +09:00
Paul Masurel	a1c07bf457	Added iterator for facet collector	2018-04-14 20:22:02 +09:00
Paul Masurel	9de74b68d1	Remove range argument	2018-04-13 18:34:23 +09:00
Paul Masurel	57c7073867	Removed	2018-04-13 09:43:36 +09:00
Paul Masurel	121374b89b	Removed the need for AtomicU64	2018-04-12 22:08:15 +09:00
Paul Masurel	e44782bf14	No more	2018-04-12 13:01:11 +09:00
Paul Masurel	dfafb24fa6	Bumped bitpacker's version	2018-04-10 21:21:47 +09:00
jason-wolfe	4c6f9541e9	#263 : Make MultiValueIntFastFieldWriter public, expose via FastFieldsWriter (#264 )	2018-04-10 12:27:34 +09:00
Paul Masurel	743ae102f1	Using bitpacker@3	2018-04-10 10:05:42 +09:00
Paul Masurel	0107fe886b	Removed timer	2018-03-31 15:40:16 +09:00
Paul Masurel	1d9566e73c	Making mmap a feature	2018-03-31 13:23:43 +09:00
Paul Masurel	8006f1df11	Added comments	2018-03-28 08:28:49 +09:00
Paul Masurel	ffa03bad71	TermScorer does not handle deletes	2018-03-27 17:35:20 +09:00
Paul Masurel	98cf4ba63a	Small refactor of postings's skip method	2018-03-27 16:14:28 +09:00
Paul Masurel	4d65771e04	field norm reader is not an option anymore.	2018-03-26 13:25:29 +09:00
Paul Masurel	9712a75399	Added unit test for intersection score	2018-03-25 12:58:24 +09:00
Paul Masurel	3ae03b91ae	PhraseScorer's score aligned with that of Lucene.)	2018-03-25 12:44:16 +09:00
Paul Masurel	238b02ce7d	Bugfixed	2018-03-23 18:50:57 +09:00
Paul Masurel	3091459777	Fixed main bug. Unit test still not passing because of altered scoring	2018-03-23 13:52:10 +09:00
Paul Masurel	b7f8884246	Closes #245 = BM25. (#260 ) * Closes #245 = BM25. Scores are the same as Lucene. * Fixing travis conf	2018-03-22 15:06:56 +09:00
Paul Masurel	e22f767fda	Backmerge	2018-03-21 21:18:46 +09:00
Paul Masurel	3ecfc36e53	Total field norm fixed.	2018-03-21 20:43:02 +09:00
Paul Masurel	1c9450174e	Fieldnorm reader working except merge	2018-03-21 17:36:16 +09:00
Paul Masurel	cde4c391cd	Added fieldnorm module	2018-03-21 15:41:46 +09:00
Paul Masurel	6d47634616	Added unit tests	2018-03-20 12:11:28 +09:00
Paul Masurel	39b182c24b	Simplified phrase queries. Reading several time is ok.	2018-03-20 11:47:48 +09:00
Paul Masurel	baaae3f4ec	Making it possible to read positions twice	2018-03-20 11:36:22 +09:00
Paul Masurel	63064601a7	Readded test for reading positions twice	2018-03-20 10:04:36 +09:00
Paul Masurel	07a8023a3a	Added	2018-03-19 14:36:43 +09:00
Paul Masurel	59639cd311	In sync with master. Fixed merging	2018-03-19 12:58:42 +09:00
Paul Masurel	b0e5e1f61d	Back merged master	2018-03-19 12:19:08 +09:00
Paul Masurel	234a902470	Removed cc from Cargo.toml	2018-03-19 12:09:25 +09:00
Paul Masurel	75d130f1ce	Edited CHANGELOG	2018-03-19 12:01:48 +09:00
Paul Masurel	410187dd24	Removed .vimrc	2018-03-19 11:54:10 +09:00
Paul Masurel	88303d4833	Removed script directory	2018-03-19 11:53:15 +09:00
Paul Masurel	a26b0ff4a2	Removed exclude cpp from travis configuration	2018-03-19 11:51:41 +09:00
Paul Masurel	d4ed86f13a	Issue/255 (#256 ) * Remove cpp compression. * Pointing to publish bitpacking * Edited README	2018-03-19 11:48:40 +09:00
Paul Masurel	fc8902353c	fieldnrom encoding. test broken	2018-03-10 18:35:16 +09:00
Paul Masurel	a2ee988304	Small change in pop_lowest.	2018-03-10 15:32:30 +09:00
Paul Masurel	97b7984200	Updated CHANGELOG	2018-03-10 14:08:11 +09:00
Paul Masurel	8683718159	Version bump	2018-03-10 14:01:30 +09:00
Paul Masurel	0cf274135b	Clippy	2018-03-10 13:07:18 +09:00
Paul Masurel	a3b44773bb	Bugfix and rustfmt	2018-03-10 12:21:50 +09:00
Paul Masurel	ec7c582109	NOBUG no-simd compression fix	2018-03-09 14:19:58 +09:00
Ewan Higgs	ee7ab72fb1	Support trailing commas using ',+ ,' trick from Blandy 2017. (#250 )	2018-02-27 10:33:39 +09:00
Paul Masurel	2c20759829	removed unsafecell for position computer	2018-02-24 12:07:55 +09:00
Paul Masurel	23387b0ed0	Positions writes to an external Vec	2018-02-24 11:14:45 +09:00
Dylan DPC	e82859f2e6	Update Cargo.toml (#249 )	2018-02-24 09:17:33 +09:00
Paul Masurel	be830b03c5	Bugfix in intersection.advance and impl skip_next	2018-02-23 11:55:23 +09:00
Paul Masurel	1b94a3e382	Phrase query optimisation	2018-02-23 00:00:22 +09:00
Paul Masurel	c3fbc4c8fa	Simplified a notch TinySet::pop_lowest()	2018-02-22 10:43:06 +09:00
Paul Masurel	4ee2db25a0	Generic on Postings rather than deletes in TermScorer	2018-02-22 08:26:45 +09:00
Paul Masurel	e423784fd0	Added specialized SegmentPostings when there are no DeleteSet	2018-02-21 23:49:20 +09:00
Paul Masurel	fdb9c3c516	Tantivy version 0.5.0	2018-02-21 11:38:26 +09:00
Paul Masurel	6fb114224a	Added unit test	2018-02-21 00:13:04 +09:00
Paul Masurel	2c3e33895a	Added unit tests	2018-02-21 00:03:41 +09:00
Paul Masurel	d512b53688	Added handling of parenthesis in query parser	2018-02-20 23:18:02 +09:00
Paul Masurel	c8afd2b55d	Added unit tests	2018-02-20 17:05:33 +09:00
Paul Masurel	3fd6d7125b	Added unit test	2018-02-20 13:12:05 +09:00
Paul Masurel	de6a3987a9	Ignoring functional test	2018-02-20 12:58:06 +09:00
Paul Masurel	3dedc465fa	Merge branch 'feature/multivalued-i64-u64'	2018-02-20 12:54:18 +09:00
Paul Masurel	f16cc6367e	Refactoring of fastfields	2018-02-20 12:52:30 +09:00
Paul Masurel	4026fc5fb1	Removed redundant compressed_block_size function	2018-02-20 08:28:28 +09:00
Paul Masurel	43742a93ef	Multivalue u64 field / i64 field.	2018-02-20 00:16:20 +09:00
Paul Masurel	2a843d86cb	Code cleaning	2018-02-19 21:51:39 +09:00
Paul Masurel	9a706c296a	Larger union horizon	2018-02-19 21:50:33 +09:00
Paul Masurel	5ff8123b7a	Code cleaning	2018-02-19 15:41:19 +09:00
Paul Masurel	6061158506	Added long running test to travis conf	2018-02-19 13:23:04 +09:00
Paul Masurel	4e8b0e89d9	Added unit test	2018-02-19 13:19:18 +09:00
Paul Masurel	0540ebb49e	Cargo clippy	2018-02-19 12:36:24 +09:00
Paul Masurel	ef94582203	Rustfmt	2018-02-19 12:12:10 +09:00
Paul Masurel	2f242d5f52	Moving docset around	2018-02-19 12:07:05 +09:00
Paul Masurel	da3d372e6e	Faster union counts	2018-02-19 10:17:16 +09:00
Paul Masurel	42fd3fe5c7	Bugfix on TermWeight::count()	2018-02-18 10:59:18 +09:00
Paul Masurel	5dae6e6bbc	Downcast `TermScorer` for intersection when all legs are TermScorers	2018-02-18 10:28:43 +09:00
Paul Masurel	e608e0a1df	Removed half baked usage of Any	2018-02-18 10:01:14 +09:00
Paul Masurel	6c8c90d348	Removed lifetime from scorer	2018-02-18 09:12:40 +09:00
Paul Masurel	eb50e92ec4	Removed specialized postings on SegmentPostings	2018-02-18 00:09:15 +09:00
Paul Masurel	20bede9462	Bugfix when requesting no termfreq.	2018-02-17 22:41:12 +09:00
Paul Masurel	4640ab4e65	Merge branch 'master' into issue/query-perf	2018-02-17 17:31:51 +09:00
Paul Masurel	cd51ed0f9f	Added comments	2018-02-17 16:59:28 +09:00
Paul Masurel	6676fe5717	Added a count method	2018-02-17 15:02:51 +09:00
Paul Masurel	292bb17346	Disable scoring - Disabling scoring is an argument of the `.weight()` method - Collectors declare whether they need scoring	2018-02-17 12:43:16 +09:00
Paul Masurel	0300e7272b	Scoring for union.	2018-02-17 11:56:21 +09:00
Paul Masurel	8760899fa2	Stupid implementaiton of Box<Scorer>::collect	2018-02-16 19:30:50 +09:00
Paul Masurel	c89d570a79	rustfmt	2018-02-16 17:50:05 +09:00
Paul Masurel	1da06d867b	Using the same logic when score is enabled.	2018-02-16 17:36:33 +09:00
Paul Masurel	76e8db6ed3	blop	2018-02-16 14:57:08 +09:00
Paul Masurel	31e5580bfa	Renaming intersection / exclude	2018-02-16 11:55:56 +09:00
Paul Masurel	930d3db2f7	Integrated reqopt_scorer	2018-02-16 11:43:27 +09:00
Paul Masurel	1593e1dc6f	Added reqopt	2018-02-16 11:22:39 +09:00
Paul Masurel	e0189fc9e6	Added exclude query	2018-02-14 18:06:51 +09:00
Paul Masurel	ffdb4ef0a7	Added unit test	2018-02-14 11:58:40 +09:00
Paul Masurel	58845344c2	Unit test + bugfix in union	2018-02-13 14:54:20 +09:00
Paul Masurel	548ec9ecca	Added ok unit test	2018-02-12 17:48:41 +09:00
Paul Masurel	86b700fa93	Updated travis.yml	2018-02-12 12:13:36 +09:00
Paul Masurel	e95c49e749	Added unit test to show bug in intersection	2018-02-12 12:06:19 +09:00
Paul Masurel	f3033a8469	Added sudo required to travis conf because of https://github.com/travis-ci/travis-ci/issues/9061	2018-02-12 11:19:12 +09:00
Paul Masurel	c4125bda59	Backmerging master	2018-02-12 11:08:57 +09:00
Paul Masurel	a7ffc0e610	Rustfmt	2018-02-12 10:31:29 +09:00
Paul Masurel	9370427ae2	Terminfo blocks (#244 ) * Using u64 key in the store * Using Option<> for the next element, as opposed to u64 * Code simplification. * Added TermInfoStoreWriter. * Added a TermInfoStore * Added FixedSized for BinarySerialized.	2018-02-12 10:24:58 +09:00
Paul Masurel	1fc7afa90a	Issue/range query (#242 ) BitSet and RangeQuery	2018-02-05 09:33:25 +09:00
Paul Masurel	6a104e4f69	Cargo fmt	2018-02-03 11:59:34 +09:00
Paul Masurel	920f086e1d	Clippy	2018-02-03 11:46:01 +09:00
Paul Masurel	13aaca7e11	Merge branch 'master' into merge-facets	2018-02-03 11:13:02 +09:00
Paul Masurel	df53dc4ceb	Format	2018-02-03 00:21:05 +09:00
Paul Masurel	dd028841e8	Added documentation / test and change the contract of .add_facet()	2018-02-03 00:17:51 +09:00
Paul Masurel	eb84b8a60d	bugfix	2018-02-02 18:52:07 +09:00
Paul Masurel	c05f46ad0e	skip for intersection	2018-02-02 17:22:58 +09:00
Paul Masurel	435ff9d524	Make constructor of RangeQuery public	2018-02-02 16:50:22 +09:00
Paul Masurel	fdd5dd8496	Merge branch 'master' into issue/query-perf	2018-02-02 16:39:28 +09:00
Paul Masurel	fb5476d5de	Query optimization: phrase query + union	2018-02-02 16:39:17 +09:00
Paul Masurel	dd8332c327	Added disabling scoring	2018-02-02 12:11:56 +09:00
Paul Masurel	63d201150b	issue/range-query Added range query	2018-02-02 00:41:12 +09:00
Paul Masurel	b78efdc59f	NOBUG Use the skipping logic of segment postings in	2018-02-01 18:36:55 +09:00
Paul Masurel	5cb08f7996	Method to create bitset from DocSet directly.	2018-02-01 18:25:43 +09:00
Paul Masurel	1947a19700	Added bitse	2018-01-31 23:56:54 +09:00
Paul Masurel	271b019420	added cargo doc	2018-01-30 15:18:19 +09:00
Paul Masurel	340693184f	Added comment	2018-01-30 15:15:55 +09:00
Paul Masurel	97782a9511	updated travis-cargo	2018-01-30 13:18:51 +09:00
Paul Masurel	930010aa88	Unit test passing	2018-01-28 00:03:51 +09:00
Paul Masurel	7f5b07d4e7	Fixing unit tests	2018-01-25 14:55:29 +09:00
Paul Masurel	3edb3dce6a	Test not passing	2018-01-25 12:46:32 +09:00
Paul Masurel	1edaf7a312	Closes #236 . Removes dependency to version.	2018-01-20 12:12:43 +09:00
Paul Masurel	137906ff29	Fixing PhraseQuery, broken due to the reordering of the intersection clauses. Closes #234	2018-01-12 21:01:28 +09:00
Paul Masurel	143a143cde	issue/232 added unit test. (#233 )	2018-01-11 23:37:45 +09:00
Paul Masurel	4f5ce12a77	NOBUG removed cpp from patterns	2018-01-05 12:09:42 +09:00
Paul Masurel	813efa4ab3	NOBUG coveralls	2018-01-05 11:03:27 +09:00
Paul Masurel	c3b6c1dc0b	NOBUG coveralls	2018-01-05 00:31:57 +09:00
Paul Masurel	6f5e0ef6f4	NOBUG Simplify travis	2018-01-04 20:51:00 +09:00
Paul Masurel	7224f58895	Merge branch 'issue/218' Conflicts: src/directory/mmap_directory.rs src/lib.rs	2018-01-04 18:47:10 +09:00
Paul Masurel	49519c3f61	added comments	2018-01-04 12:53:20 +09:00
Paul Masurel	cb11b92505	Added comments	2018-01-04 12:27:14 +09:00
Paul Masurel	7b2dcfbd91	Merge branch 'issue/227'	2018-01-04 12:12:00 +09:00
Paul Masurel	d2e30e6681	Merge branch 'master' of github.com:tantivy-search/tantivy	2018-01-04 12:09:44 +09:00
Paul Masurel	ef109927b3	rustfmt	2018-01-04 12:08:34 +09:00
Paul Masurel	44e5c4dfd3	Added alphanum only token filter	2017-12-31 13:43:10 +09:00
Paul Masurel	6f223253ea	Made load_metas public	2017-12-31 08:57:19 +09:00
Paul Masurel	f7b0392bd5	issue/230 Add an optional commit message. (#231 ) Closes #230	2017-12-27 12:27:02 +09:00
Paul Masurel	442bc9a1b8	Fixes the computation of the memory size of a hashtable with a key of `n` bits. (#229 ) Closes #228	2017-12-25 13:04:10 +09:00
Paul Masurel	db7d784573	Issue 227 Faster merge when there are no deletes	2017-12-21 22:04:05 +09:00
Paul Masurel	74d32e522a	Stopped using mmap in tantivy. Caching MmapReadOnly. Closes #218	2017-10-08 17:07:19 +09:00