Added convert to static [u8]

Using bitpacker@3
remove comment
2025-12-30 05:52:54 +00:00 · 2018-04-10 21:18:32 +09:00 · 2018-04-10 10:05:42 +09:00 · 2018-04-09 21:51:17 +09:00 · 2018-03-31 17:42:26 +09:00 · 2018-03-31 15:40:16 +09:00
179 changed files with 8437 additions and 100354 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+*.swp
 target
 target/debug
 .vscode
@@ -8,4 +9,4 @@ benchmark
 cpp/simdcomp/bitpackingbenchmark
 *.bk
 .idea
-trace.dat
+trace.dat
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,5 @@
 language: rust
+sudo: required
 cache: cargo
 rust:
  - nightly
@@ -22,13 +23,15 @@ addons:
      - binutils-dev
      - cmake
 before_script:
-  - |
-      cargo install cargo-travis || echo "cargo-travis already installed"
-      export PATH=$HOME/.cargo/bin:$PATH
+  - export PATH=$HOME/.cargo/bin:$PATH
+  - cargo install cargo-update || echo "cargo-update already installed"
+  - cargo install cargo-travis || echo "cargo-travis already installed"
 script:
  - cargo build
  - cargo test
+  - cargo test -- --ignored
  - cargo run --example simple_search
+  - cargo doc
 after_success:
-  - cargo coveralls --exclude-pattern cpp/
-  - travis-cargo doc-upload
+  - cargo coveralls --exclude-pattern src/functional_test.rs
+  - cargo doc-upload
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,27 @@
+Tantivy 0.5.2
+==========================
+
+- Removed C code. Tantivy is now pure Rust.
+- BM25
+- Approximate field norms encoded over 1 byte.
+
+Tantivy 0.5.1
+==========================
+- bugfix #254 : tantivy failed if no documents in a segment contained a specific field.
+
+
+Tantivy 0.5
+==========================
+- Faceting
+- RangeQuery
+- Configurable tokenization pipeline
+- Bugfix in PhraseQuery
+- Various query optimisation
+- Allowing very large indexes
+    - 64 bits file address
+    - Smarter encoding of the `TermInfo` objects
+
+

 Tantivy 0.4.3
 ==========================
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,8 +1,7 @@
 [package]
 name = "tantivy"
-version = "0.5.0-dev"
+version = "0.5.1"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
-build = "build.rs"
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
 description = """Tantivy is a search engine library."""
@@ -17,25 +16,19 @@ byteorder = "1.0"
 lazy_static = "0.2.1"
 tinysegmenter = "0.1.0"
 regex = "0.2"
-fst = "0.2"
-atomicwrites = "0.1.3"
-tempfile = "2.1"
+fst = {version="0.2", default-features=false}
+atomicwrites = {version="0.1", optional=true}
 log = "0.3.6"
 combine = "2.2"
 tempdir = "0.3"
 serde = "1.0"
 serde_derive = "1.0"
 serde_json = "1.0"
-bincode = "0.8"
-libc = {version = "0.2.20", optional=true}
 num_cpus = "1.2"
 itertools = "0.5.9"
-lz4 = "1.20"
 bit-set = "0.4.0"
-time = "0.1"
-uuid = { version = "0.5", features = ["v4", "serde"] }
+uuid = { version = "0.6", features = ["v4", "serde"] }
 chan = "0.1"
-version = "2"
 crossbeam = "0.3"
 futures = "0.1"
 futures-cpupool = "0.1"
@@ -43,17 +36,19 @@ error-chain = "0.8"
 owning_ref = "0.3"
 stable_deref_trait = "1.0.0"
 rust-stemmers = "0.1.0"
+downcast = { version="0.9", features = ["nightly"]}
+matches = "0.1"
+snap = "0.2"
+bitpacking = {path = "../bitpacking"}

 [target.'cfg(windows)'.dependencies]
 winapi = "0.2"

 [dev-dependencies]
 rand = "0.3"
+tempfile = "2.1"
 env_logger = "0.4"

-[build-dependencies]
-cc = {version = "1.0.0", optional=true}
-
 [profile.release]
 opt-level = 3
 debug = false
@@ -62,10 +57,23 @@ debug-assertions = false


 [features]
-default = ["simdcompression"]
-simdcompression = ["libc", "cc"]
+default = ["mmap"]
 streamdict = []
+mmap = ["fst/mmap", "atomicwrites"]


 [badges]
 travis-ci = { repository = "tantivy-search/tantivy" }
+
+[[example]]
+name = "simple_search"
+required-features = ["mmap"]
+
+
+[[bin]]
+name = "convert_to_static"
+path = "./bin/convert_to_static.rs"
+
+[[bin]]
+name = "test_static_dir"
+path = "./bin/test_static_dir.rs"
--- a/README.md
+++ b/README.md
@@ -5,25 +5,26 @@
 [![Join the chat at https://gitter.im/tantivy-search/tantivy](https://badges.gitter.im/tantivy-search/tantivy.svg)](https://gitter.im/tantivy-search/tantivy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Build status](https://ci.appveyor.com/api/projects/status/r7nb13kj23u8m9pj?svg=true)](https://ci.appveyor.com/project/fulmicoton/tantivy)
-![beacon for google analytics](https://ga-beacon.appspot.com/UA-88834340-1/tantivy/README)

 **Tantivy** is a **full text search engine library** written in rust.

 It is strongly inspired by Lucene's design.

-
 # Features

- configurable indexing (optional term frequency and position indexing)
+- Tiny startup time (<10ms), perfect for command line tools
 - tf-idf scoring
 - Basic query language
 - Phrase queries
 - Incremental indexing
 - Multithreaded indexing (indexing English Wikipedia takes < 3 minutes on my desktop)
- mmap based
+- Mmap directory
 - optional SIMD integer compression
- u64 and i64 fast fields (equivalent of doc values in Lucene)
+- Single valued and multivalued u64 and i64 fast fields (equivalent of doc values in Lucene)
 - LZ4 compressed document store
+- Range queries
+- Faceting
+- configurable indexing (optional term frequency and position indexing
 - Cheesy logo with a horse

 Tantivy supports Linux, MacOS and Windows.
@@ -40,14 +41,38 @@ It will walk you through getting a wikipedia search engine up and running in a f

 # Compiling

-Tantivy requires Rust Nightly because it uses requires the features [`box_syntax`](https://doc.rust-lang.org/stable/book/box-syntax-and-patterns.html), [`optin_builtin_traits`](https://github.com/rust-lang/rfcs/blob/master/text/0019-opt-in-builtin-traits.md), and [`conservative_impl_trait`](https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md).
-The project can then be built using `cargo`.
+## Development
+
+Tantivy requires Rust Nightly because it uses requires the features [`box_syntax`](https://doc.rust-lang.org/stable/unstable-book/language-features/box-syntax.html), [`optin_builtin_traits`](https://github.com/rust-lang/rfcs/blob/master/text/0019-opt-in-builtin-traits.md), [`conservative_impl_trait`](https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md),
+and [simd](https://github.com/rust-lang/rust/issues/27731).
+
+
+To check out and run test, you can simply run :

    git clone git@github.com:tantivy-search/tantivy.git
    cd tantivy
-    cargo build
+    cargo +nightly build


+## Note on release build and performance
+
+If your project depends on `tantivy`, for better performance, make sure to enable
+`sse3` instructions using a RUSTFLAGS. (This instruction set is likely to
+be available on most `x86_64` CPUs you will encounter).
+
+For instance,
+
+    RUSTFLAGS='-C target-feature=+sse3'
+
+Or, if you are targetting a specific cpu
+
+    RUSTFLAGS='-C target-cpu=native' build --release
+
+Regardless of the flags you pass, by default `tantivy` will contain `SSE3` instructions.
+If you want to disable those, you can run the following command :
+
+    cargo build --no-default-features
+
 Alternatively, if you are trying to compile `tantivy` without simd compression,
 you can disable this functionality. In this case, this submodule is not required
 and you can compile tantivy by using the `--no-default-features` flag.
@@ -57,4 +82,4 @@ and you can compile tantivy by using the `--no-default-features` flag.

 # Contribute

-Send me an email (paul.masurel at gmail.com) if you want to contribute to tantivy.
+Send me an email (paul.masurel at gmail.com) if you want to contribute to tantivy.
--- a/bin/convert_to_static.rs
+++ b/bin/convert_to_static.rs
@@ -0,0 +1,20 @@
+use std::env;
+use std::path::PathBuf;
+use std::fs::File;
+use std::io::Write;
+extern crate tantivy;
+use tantivy::directory::write_static_from_directory;
+
+fn main() {
+    // Prints each argument on a separate line
+    let  mut args = env::args();
+    args.next().unwrap();
+    let directory_path= args.next().expect("Expect 2 args.<directory_path> <outputfile>");
+    let output_path = args.next().expect("Expect 2 args.<directory_path> <outputfile>");
+    println!("{} => {}", directory_path, output_path);
+    let buffer = write_static_from_directory(&PathBuf::from(directory_path)).unwrap();
+    println!("Read all");
+    let mut output = File::create(output_path).unwrap();
+    output.write_all(&buffer[..]).unwrap();
+    output.flush().unwrap();
+}
--- a/bin/test_static_dir.rs
+++ b/bin/test_static_dir.rs
@@ -0,0 +1,51 @@
+use std::env;
+use std::path::PathBuf;
+use std::fs::File;
+use std::io::Write;
+extern crate tantivy;
+use tantivy::directory::{StaticDirectory, write_static_from_directory};
+use tantivy::Index;
+use tantivy::query::QueryParser;
+use tantivy::collector::TopCollector;
+
+
+static DATA: &'static [u8] = include_bytes!("output.bin");
+
+fn run() -> tantivy::Result<()> {
+    // Prints each argument on a separate line
+    let directory = StaticDirectory::open(DATA).unwrap();
+    let index = Index::open_directory(directory).unwrap();
+    index.load_searchers().unwrap();
+    let searcher = index.searcher();
+
+    let schema = index.schema();
+    let title = schema.get_field("title").unwrap();
+    let body = schema.get_field("body").unwrap();
+
+    let query_parser = QueryParser::for_index(&index, vec![title, body]);
+    let query = query_parser.parse_query("sea whale")?;
+
+    let mut top_collector = TopCollector::with_limit(10);
+
+    searcher.search(&*query, &mut top_collector)?;
+
+    let doc_addresses = top_collector.docs();
+
+    // The actual documents still need to be
+    // retrieved from Tantivy's store.
+    //
+    // Since the body field was not configured as stored,
+    // the document returned will only contain
+    // a title.
+
+    for doc_address in doc_addresses {
+        let retrieved_doc = searcher.doc(&doc_address)?;
+        println!("{}", schema.to_json(&retrieved_doc));
+    }
+    Ok(())
+}
+
+
+fn main() {
+    run().unwrap();
+}
--- a/build.rs
+++ b/build.rs
@@ -1,61 +0,0 @@
-#[cfg(feature = "simdcompression")]
-mod build {
-    extern crate cc;
-
-    pub fn build() {
-        let mut config = cc::Build::new();
-        config
-            .include("./cpp/simdcomp/include")
-            .file("cpp/simdcomp/src/avxbitpacking.c")
-            .file("cpp/simdcomp/src/simdintegratedbitpacking.c")
-            .file("cpp/simdcomp/src/simdbitpacking.c")
-            .file("cpp/simdcomp/src/simdpackedsearch.c")
-            .file("cpp/simdcomp/src/simdcomputil.c")
-            .file("cpp/simdcomp/src/simdpackedselect.c")
-            .file("cpp/simdcomp/src/simdfor.c")
-            .file("cpp/simdcomp_wrapper.c");
-
-        if !cfg!(debug_assertions) {
-            config.opt_level(3);
-
-            if cfg!(target_env = "msvc") {
-                config
-                    .define("NDEBUG", None)
-                    .flag("/Gm-")
-                    .flag("/GS-")
-                    .flag("/Gy")
-                    .flag("/Oi")
-                    .flag("/GL");
-            }
-        }
-
-        if !cfg!(target_env = "msvc") {
-            config
-                .include("./cpp/streamvbyte/include")
-                .file("cpp/streamvbyte/src/streamvbyte.c")
-                .file("cpp/streamvbyte/src/streamvbytedelta.c")
-                .flag("-msse4.1")
-                .flag("-march=native")
-                .flag("-std=c99");
-        }
-
-        config.compile("libsimdcomp.a");
-
-        // Workaround for linking static libraries built with /GL
-        // https://github.com/rust-lang/rust/issues/26003
-        if !cfg!(debug_assertions) && cfg!(target_env = "msvc") {
-            println!("cargo:rustc-link-lib=dylib=simdcomp");
-        }
-
-        println!("cargo:rerun-if-changed=cpp");
-    }
-}
-
-#[cfg(not(feature = "simdcompression"))]
-mod build {
-    pub fn build() {}
-}
-
-fn main() {
-    build::build();
-}
--- a/cpp/simdcomp/.gitignore
+++ b/cpp/simdcomp/.gitignore
@@ -1,9 +0,0 @@
-Makefile.in
-lib*
-unit*
-*.o
-src/*.lo
-src/*.o
-src/.deps
-src/.dirstamp
-src/.libs
--- a/cpp/simdcomp/.travis.yml
+++ b/cpp/simdcomp/.travis.yml
@@ -1,11 +0,0 @@
-language: c
-sudo: false
-compiler:
-  - gcc
-  - clang
-
-branches:
-  only:
-    - master
-
-script: make && ./unit
--- a/cpp/simdcomp/CHANGELOG
+++ b/cpp/simdcomp/CHANGELOG
@@ -1,9 +0,0 @@
-Upcoming
-  - added missing include
-  - improved portability (MSVC)
-  - implemented C89 compatibility
-Version 0.0.3 (19 May 2014)
-  - improved documentation
-Version 0.0.2 (6 February 2014)
-  - added go demo
-Version 0.0.1  (5 February 2014)
--- a/cpp/simdcomp/LICENSE
+++ b/cpp/simdcomp/LICENSE
@@ -1,27 +0,0 @@
-Copyright (c) 2014--, The authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice, this
-  list of conditions and the following disclaimer in the documentation and/or
-  other materials provided with the distribution.
-
-* Neither the name of the {organization} nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/cpp/simdcomp/README.md
+++ b/cpp/simdcomp/README.md
@@ -1,137 +0,0 @@
-The SIMDComp library
-====================
-[![Build Status](https://travis-ci.org/lemire/simdcomp.png)](https://travis-ci.org/lemire/simdcomp)
-
-A simple C library for compressing lists of integers using binary packing and SIMD instructions.
-The assumption is either that you have a list of 32-bit integers where most of them are small, or a list of 32-bit integers where differences between successive integers are small. No software is able to reliably compress an array of 32-bit random numbers.
-
-This library can decode at least 4 billions of compressed integers per second on most
-desktop or laptop processors. That is, it can decompress data at a rate of 15 GB/s.
-This is significantly faster than generic codecs like gzip, LZO, Snappy or LZ4.
-
-On a Skylake Intel processor, it can decode integers at a rate 0.3 cycles per integer,
-which can easily translate into more than 8 decoded billions integers per second.
-
-Contributors: Daniel Lemire, Nathan Kurz, Christoph Rupp, Anatol Belski, Nick White and others
-
-What is it for?
-------------
-
-This is a low-level library for fast integer compression. By design it does not define a compressed
-format. It is up to the (sophisticated) user to create a compressed format.
-
-Requirements
-------------
-
- Your processor should support SSE4.1 (It is supported by most Intel and AMD processors released since 2008.)
- It is possible to build the core part of the code if your processor support SSE2 (Pentium4 or better)
- C99 compliant compiler (GCC is assumed)
- A Linux-like distribution is assumed by the makefile
-
-For a plain C version that does not use SIMD instructions, see https://github.com/lemire/LittleIntPacker
-
-Usage
-------
-
-Compression works over blocks of 128 integers.
-
-For a complete working example, see example.c (you can build it and
-run it with "make example; ./example").
-
-
-
-1) Lists of integers in random order.
-
-```C            
-const uint32_t b = maxbits(datain);// computes bit width
-simdpackwithoutmask(datain, buffer, b);//compressed to buffer, compressing 128 32-bit integers down to b*32 bytes
-simdunpack(buffer, backbuffer, b);//uncompressed to backbuffer
-```
-
-While 128 32-bit integers are read, only b 128-bit words are written. Thus, the compression ratio is 32/b.
-
-2) Sorted lists of integers.
-
-We used differential coding: we store the difference between successive integers. For this purpose, we need an initial value (called offset).
-
-```C            
-uint32_t offset = 0;
-uint32_t b1 = simdmaxbitsd1(offset,datain); // bit width
-simdpackwithoutmaskd1(offset, datain, buffer, b1);//compressing 128 32-bit integers down to b1*32 bytes
-simdunpackd1(offset, buffer, backbuffer, b1);//uncompressed
-```
-
-General example for arrays of arbitrary length:
-```C
-int compress_decompress_demo() {
-  size_t k, N = 9999;
-  __m128i * endofbuf;
-  uint32_t * datain = malloc(N * sizeof(uint32_t));
-  uint8_t * buffer;
-  uint32_t * backbuffer = malloc(N * sizeof(uint32_t));
-  uint32_t b;
-
-  for (k = 0; k < N; ++k){        /* start with k=0, not k=1! */
-    datain[k] = k;
-  }
-
-  b = maxbits_length(datain, N);
-  buffer = malloc(simdpack_compressedbytes(N,b)); // allocate just enough memory
-  endofbuf = simdpack_length(datain, N, (__m128i *)buffer, b);
-  /* compressed data is stored between buffer and endofbuf using (endofbuf-buffer)*sizeof(__m128i) bytes */
-  /* would be safe to do : buffer = realloc(buffer,(endofbuf-(__m128i *)buffer)*sizeof(__m128i)); */
-  simdunpack_length((const __m128i *)buffer, N, backbuffer, b);
-
-  for (k = 0; k < N; ++k){
-    if(datain[k] != backbuffer[k]) {
-      printf("bug\n");
-      return -1;
-    }
-  }
-  return 0;
-}
-```
-
-
-3) Frame-of-Reference 
-
-We also have frame-of-reference (FOR) functions (see simdfor.h header). They work like the bit packing
-routines, but do not use differential coding so they allow faster search in some cases, at the expense
-of compression.
-
-Setup
---------
-
-
-make
-make test
-
-and if you are daring:
-
-make install
-
-Go
--------
-
-If you are a go user, there is a "go" folder where you will find a simple demo.
-
-Other libraries
----------------
-
-* Fast decoder for VByte-compressed integers https://github.com/lemire/MaskedVByte
-* Fast integer compression in C using StreamVByte https://github.com/lemire/streamvbyte
-* FastPFOR is a C++ research library well suited to compress unsorted arrays: https://github.com/lemire/FastPFor
-* SIMDCompressionAndIntersection is a C++ research library well suited for sorted arrays (differential coding)
-and computing intersections: https://github.com/lemire/SIMDCompressionAndIntersection
-* TurboPFor is a C library that offers lots of interesting optimizations. Well worth checking! (GPL license) https://github.com/powturbo/TurboPFor
-* Oroch is a C++ library that offers a usable API (MIT license) https://github.com/ademakov/Oroch
-
-
-References
------------
-
-* Daniel Lemire, Leonid Boytsov, Nathan Kurz, SIMD Compression and the Intersection of Sorted Integers, Software Practice & Experience 46 (6) 2016. http://arxiv.org/abs/1401.6399
-* Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second through vectorization, Software Practice & Experience 45 (1), 2015.  http://arxiv.org/abs/1209.2137 http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract
-* Jeff Plaisance, Nathan Kurz, Daniel Lemire, Vectorized VByte Decoding, International Symposium on Web Algorithms 2015, 2015. http://arxiv.org/abs/1503.07387
-* Wayne Xin Zhao, Xudong Zhang, Daniel Lemire, Dongdong Shan, Jian-Yun Nie, Hongfei Yan, Ji-Rong Wen, A General SIMD-based Approach to Accelerating Compression Algorithms, ACM Transactions on Information Systems 33 (3), 2015. http://arxiv.org/abs/1502.01916
-* T. D. Wu, Bitpacking techniques for indexing genomes: I. Hash tables, Algorithms for Molecular Biology 11 (5), 2016. http://almob.biomedcentral.com/articles/10.1186/s13015-016-0069-5
--- a/cpp/simdcomp/benchmarks/benchmark.c
+++ b/cpp/simdcomp/benchmarks/benchmark.c
@@ -1,235 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-
-#include "simdcomp.h"
-
-#ifdef _MSC_VER
-# include <windows.h>
-
-__int64 freq;
-
-typedef __int64 time_snap_t;
-
-static time_snap_t time_snap(void)
-{
-	__int64 now;
-
-	QueryPerformanceCounter((LARGE_INTEGER *)&now);
-
-	return (__int64)((now*1000000)/freq);
-}
-# define TIME_SNAP_FMT "%I64d"
-#else
-# define time_snap clock
-# define TIME_SNAP_FMT "%lu"
-typedef clock_t time_snap_t;
-#endif
-
-
-void benchmarkSelect() {
-    uint32_t buffer[128];
-    uint32_t backbuffer[128];
-    uint32_t initial = 33;
-    uint32_t b;
-    time_snap_t S1, S2, S3;
-    int i;
-    printf("benchmarking select \n");
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 0; b <= 32; b++) {
-        uint32_t prev = initial;
-        uint32_t out[128];
-        /* initialize the buffer */
-        for (i = 0; i < 128; i++) {
-            buffer[i] =  ((uint32_t)(1655765 * i )) ;
-            if(b < 32) buffer[i] %= (1<<b);
-        }
-        for (i = 0; i < 128; i++) {
-            buffer[i] = buffer[i] + prev;
-            prev = buffer[i];
-        }
-
-        for (i = 1; i < 128; i++) {
-            if(buffer[i] < buffer[i-1] )
-                buffer[i] = buffer[i-1];
-        }
-        assert(simdmaxbitsd1(initial, buffer)<=b);
-
-        for (i = 0; i < 128; i++) {
-            out[i] = 0; /* memset would do too */
-        }
-
-        /* delta-encode to 'i' bits */
-        simdpackwithoutmaskd1(initial, buffer, (__m128i *)out, b);
-
-        S1 = time_snap();
-        for (i = 0; i < 128 * 10; i++) {
-            uint32_t valretrieved = simdselectd1(initial, (__m128i *)out, b, (uint32_t)i % 128);
-            assert(valretrieved == buffer[i%128]);
-        }
-        S2 = time_snap();
-        for (i = 0; i < 128 * 10; i++) {
-            simdunpackd1(initial,  (__m128i *)out, backbuffer, b);
-            assert(backbuffer[i % 128] == buffer[i % 128]);
-        }
-        S3 = time_snap();
-        printf("bit width = %d, fast select function time = " TIME_SNAP_FMT ", naive time = " TIME_SNAP_FMT "  \n", b, (S2-S1), (S3-S2));
-    }
-}
-
-int uint32_cmp(const void *a, const void *b)
-{
-    const uint32_t *ia = (const uint32_t *)a;
-    const uint32_t *ib = (const uint32_t *)b;
-    if(*ia < *ib)
-        return -1;
-    else if (*ia > *ib)
-        return 1;
-    return 0;
-}
-
-/* adapted from wikipedia */
-int binary_search(uint32_t * A, uint32_t key, int imin, int imax)
-{
-    int imid;
-    imax --;
-    while(imin + 1 < imax) {
-        imid = imin + ((imax - imin) / 2);
-
-        if (A[imid] > key) {
-            imax = imid;
-        } else if (A[imid] < key) {
-            imin = imid;
-        } else {
-            return imid;
-        }
-    }
-    return imax;
-}
-
-
-/* adapted from wikipedia */
-int lower_bound(uint32_t * A, uint32_t key, int imin, int imax)
-{
-    int imid;
-    imax --;
-    while(imin + 1 < imax) {
-        imid = imin + ((imax - imin) / 2);
-
-        if (A[imid] >= key) {
-            imax = imid;
-        } else if (A[imid] < key) {
-            imin = imid;
-        }
-    }
-    if(A[imin] >= key) return imin;
-    return imax;
-}
-
-void benchmarkSearch() {
-    uint32_t buffer[128];
-    uint32_t backbuffer[128];
-    uint32_t out[128];
-    uint32_t result, initial = 0;
-    uint32_t b, i;
-    time_snap_t S1, S2, S3, S4;
-
-    printf("benchmarking search \n");
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 0; b <= 32; b++) {
-        uint32_t prev = initial;
-        /* initialize the buffer */
-        for (i = 0; i < 128; i++) {
-            buffer[i] =  ((uint32_t)rand()) ;
-            if(b < 32) buffer[i] %= (1<<b);
-        }
-
-        qsort(buffer,128, sizeof(uint32_t), uint32_cmp);
-
-        for (i = 0; i < 128; i++) {
-            buffer[i] = buffer[i] + prev;
-            prev = buffer[i];
-        }
-        for (i = 1; i < 128; i++) {
-            if(buffer[i] < buffer[i-1] )
-                buffer[i] = buffer[i-1];
-        }
-        assert(simdmaxbitsd1(initial, buffer)<=b);
-        for (i = 0; i < 128; i++) {
-            out[i] = 0; /* memset would do too */
-        }
-
-        /* delta-encode to 'i' bits */
-        simdpackwithoutmaskd1(initial, buffer, (__m128i *)out, b);
-        simdunpackd1(initial,  (__m128i *)out, backbuffer, b);
-
-        for (i = 0; i < 128; i++) {
-            assert(buffer[i] == backbuffer[i]);
-         }
-        S1 = time_snap();
-        for (i = 0; i < 128 * 10; i++) {
-
-            int pos;
-            uint32_t pseudorandomkey  =  buffer[i%128];
-            __m128i vecinitial = _mm_set1_epi32(initial);
-            pos = simdsearchd1(&vecinitial, (__m128i *)out, b,
-                               pseudorandomkey, &result);
-            if((result < pseudorandomkey) || (buffer[pos] != result)) {
-                printf("bug A.\n");
-            } else if (pos > 0) {
-                if(buffer[pos-1] >= pseudorandomkey)
-                    printf("bug B.\n");
-            }
-        }
-        S2 = time_snap();
-        for (i = 0; i < 128 * 10; i++) {
-            int pos;
-            uint32_t pseudorandomkey  =  buffer[i%128];
-            simdunpackd1(initial,  (__m128i *)out, backbuffer, b);
-            pos =  lower_bound(backbuffer, pseudorandomkey, 0, 128);
-            result = backbuffer[pos];
-
-            if((result < pseudorandomkey) || (buffer[pos] != result)) {
-                printf("bug C.\n");
-            } else if (pos > 0) {
-                if(buffer[pos-1] >= pseudorandomkey)
-                    printf("bug D.\n");
-            }
-        }
-        S3 = time_snap();
-        for (i = 0; i < 128 * 10; i++) {
-
-            int pos;
-            uint32_t pseudorandomkey  =  buffer[i%128];
-            pos = simdsearchwithlengthd1(initial, (__m128i *)out, b, 128,
-                               pseudorandomkey, &result);
-            if((result < pseudorandomkey) || (buffer[pos] != result)) {
-                printf("bug A.\n");
-            } else if (pos > 0) {
-                if(buffer[pos-1] >= pseudorandomkey)
-                    printf("bug B.\n");
-            }
-        }
-        S4 = time_snap();
-
-        printf("bit width = %d, fast search function time = " TIME_SNAP_FMT ", naive time = " TIME_SNAP_FMT " , fast with length time = " TIME_SNAP_FMT "  \n", b, (S2-S1), (S3-S2), (S4-S3) );
-    }
-}
-
-
-int main() {
-#ifdef _MSC_VER
-    QueryPerformanceFrequency((LARGE_INTEGER *)&freq);
-#endif
-    benchmarkSearch();
-    benchmarkSelect();
-    return 0;
-}
--- a/cpp/simdcomp/benchmarks/bitpackingbenchmark.c
+++ b/cpp/simdcomp/benchmarks/bitpackingbenchmark.c
@@ -1,205 +0,0 @@
-#include <stdio.h>
-
-#include "simdcomp.h"
-
-
-#define RDTSC_START(cycles)                                                   \
-    do {                                                                      \
-        register unsigned cyc_high, cyc_low;                                  \
-        __asm volatile(                                                       \
-            "cpuid\n\t"                                                       \
-            "rdtsc\n\t"                                                       \
-            "mov %%edx, %0\n\t"                                               \
-            "mov %%eax, %1\n\t"                                               \
-            : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", "%rdx"); \
-        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;                      \
-    } while (0)
-
-#define RDTSC_FINAL(cycles)                                                   \
-    do {                                                                      \
-        register unsigned cyc_high, cyc_low;                                  \
-        __asm volatile(                                                       \
-            "rdtscp\n\t"                                                      \
-            "mov %%edx, %0\n\t"                                               \
-            "mov %%eax, %1\n\t"                                               \
-            "cpuid\n\t"                                                       \
-            : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", "%rdx"); \
-        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;                      \
-    } while (0)
-
-
-
-
-uint32_t * get_random_array_from_bit_width(uint32_t length, uint32_t bit) {
-    uint32_t * answer = malloc(sizeof(uint32_t) * length);
-    uint32_t mask = (uint32_t) ((UINT64_C(1) << bit) - 1);
-    uint32_t i;
-    for(i = 0; i < length; ++i) {
-        answer[i] = rand() & mask;
-    }
-    return answer;
-}
-
-uint32_t * get_random_array_from_bit_width_d1(uint32_t length, uint32_t bit) {
-    uint32_t * answer = malloc(sizeof(uint32_t) * length);
-    uint32_t mask = (uint32_t) ((UINT64_C(1) << bit) - 1);
-    uint32_t i;
-    answer[0] = rand() & mask;
-    for(i = 1; i < length; ++i) {
-        answer[i] = answer[i-1] + (rand() & mask);
-    }
-    return answer;
-}
-
-
-void demo128() {
-    const uint32_t length = 128;
-    uint32_t bit;
-    printf("# --- %s\n", __func__);
-    printf("# compressing %d integers\n",length);
-    printf("# format: bit width, pack in cycles per int, unpack in cycles per int\n");
-    for(bit = 1; bit <= 32; ++bit) {
-        uint32_t i;
-
-        uint32_t * data = get_random_array_from_bit_width(length, bit);
-        __m128i * buffer = malloc(length * sizeof(uint32_t));
-        uint32_t * backdata = malloc(length * sizeof(uint32_t));
-        uint32_t repeat = 500;
-        uint64_t min_diff;
-        printf("%d\t",bit);
-        min_diff = (uint64_t)-1;
-        for (i = 0; i < repeat; i++) {
-            uint64_t cycles_start, cycles_final, cycles_diff;
-            __asm volatile("" ::: /* pretend to clobber */ "memory");
-            RDTSC_START(cycles_start);
-            simdpackwithoutmask(data,buffer, bit);
-            RDTSC_FINAL(cycles_final);
-            cycles_diff = (cycles_final - cycles_start);
-            if (cycles_diff < min_diff) min_diff = cycles_diff;
-        }
-        printf("%.2f\t",min_diff*1.0/length);
-        min_diff = (uint64_t)-1;
-        for (i = 0; i < repeat; i++) {
-            uint64_t cycles_start, cycles_final, cycles_diff;
-            __asm volatile("" ::: /* pretend to clobber */ "memory");
-            RDTSC_START(cycles_start);
-            simdunpack(buffer, backdata,bit);
-            RDTSC_FINAL(cycles_final);
-            cycles_diff = (cycles_final - cycles_start);
-            if (cycles_diff < min_diff) min_diff = cycles_diff;
-        }
-        printf("%.2f\t",min_diff*1.0/length);
-
-        free(data);
-        free(buffer);
-        free(backdata);
-        printf("\n");
-    }
-    printf("\n\n"); /* two blank lines are required by gnuplot */
-}
-
-void demo128_d1() {
-    const uint32_t length = 128;
-    uint32_t bit;
-    printf("# --- %s\n", __func__);
-    printf("# compressing %d integers\n",length);
-    printf("# format: bit width, pack in cycles per int, unpack in cycles per int\n");
-    for(bit = 1; bit <= 32; ++bit) {
-        uint32_t i;
-
-        uint32_t * data = get_random_array_from_bit_width_d1(length, bit);
-        __m128i * buffer = malloc(length * sizeof(uint32_t));
-        uint32_t * backdata = malloc(length * sizeof(uint32_t));
-        uint32_t repeat = 500;
-        uint64_t min_diff;
-        printf("%d\t",bit);
-        min_diff = (uint64_t)-1;
-        for (i = 0; i < repeat; i++) {
-            uint64_t cycles_start, cycles_final, cycles_diff;
-            __asm volatile("" ::: /* pretend to clobber */ "memory");
-            RDTSC_START(cycles_start);
-            simdpackwithoutmaskd1(0,data,buffer, bit);
-            RDTSC_FINAL(cycles_final);
-            cycles_diff = (cycles_final - cycles_start);
-            if (cycles_diff < min_diff) min_diff = cycles_diff;
-        }
-        printf("%.2f\t",min_diff*1.0/length);
-        min_diff = (uint64_t)-1;
-        for (i = 0; i < repeat; i++) {
-            uint64_t cycles_start, cycles_final, cycles_diff;
-            __asm volatile("" ::: /* pretend to clobber */ "memory");
-            RDTSC_START(cycles_start);
-            simdunpackd1(0,buffer, backdata,bit);
-            RDTSC_FINAL(cycles_final);
-            cycles_diff = (cycles_final - cycles_start);
-            if (cycles_diff < min_diff) min_diff = cycles_diff;
-        }
-        printf("%.2f\t",min_diff*1.0/length);
-
-        free(data);
-        free(buffer);
-        free(backdata);
-        printf("\n");
-    }
-    printf("\n\n"); /* two blank lines are required by gnuplot */
-}
-
-#ifdef __AVX2__
-void demo256() {
-    const uint32_t length = 256;
-    uint32_t bit;
-    printf("# --- %s\n", __func__);
-    printf("# compressing %d integers\n",length);
-    printf("# format: bit width, pack in cycles per int, unpack in cycles per int\n");
-    for(bit = 1; bit <= 32; ++bit) {
-        uint32_t i;
-
-        uint32_t * data = get_random_array_from_bit_width(length, bit);
-        __m256i * buffer = malloc(length * sizeof(uint32_t));
-        uint32_t * backdata = malloc(length * sizeof(uint32_t));
-        uint32_t repeat = 500;
-        uint64_t min_diff;
-        printf("%d\t",bit);
-        min_diff = (uint64_t)-1;
-        for (i = 0; i < repeat; i++) {
-            uint64_t cycles_start, cycles_final, cycles_diff;
-            __asm volatile("" ::: /* pretend to clobber */ "memory");
-            RDTSC_START(cycles_start);
-            avxpackwithoutmask(data,buffer, bit);
-            RDTSC_FINAL(cycles_final);
-            cycles_diff = (cycles_final - cycles_start);
-            if (cycles_diff < min_diff) min_diff = cycles_diff;
-        }
-        printf("%.2f\t",min_diff*1.0/length);
-        min_diff = (uint64_t)-1;
-        for (i = 0; i < repeat; i++) {
-            uint64_t cycles_start, cycles_final, cycles_diff;
-            __asm volatile("" ::: /* pretend to clobber */ "memory");
-            RDTSC_START(cycles_start);
-            avxunpack(buffer, backdata,bit);
-            RDTSC_FINAL(cycles_final);
-            cycles_diff = (cycles_final - cycles_start);
-            if (cycles_diff < min_diff) min_diff = cycles_diff;
-        }
-        printf("%.2f\t",min_diff*1.0/length);
-
-        free(data);
-        free(buffer);
-        free(backdata);
-        printf("\n");
-    }
-    printf("\n\n"); /* two blank lines are required by gnuplot */
-}
-#endif /* avx 2 */
-
-
-int main() {
-    demo128();
-    demo128_d1();
-#ifdef __AVX2__
-    demo256();
-#endif
-    return 0;
-
-
-}
--- a/cpp/simdcomp/example.c
+++ b/cpp/simdcomp/example.c
@@ -1,195 +0,0 @@
-/* Type "make example" to build this example program. */
-#include <stdio.h>
-#include <time.h>
-#include <stdlib.h>
-#include "simdcomp.h"
-
-/**
-We provide several different code examples.
-**/
-
-
-/* very simple test to illustrate a simple application */
-int compress_decompress_demo() {
-    size_t k, N = 9999;
-    __m128i * endofbuf;
-    int howmanybytes;
-    float compratio;
-    uint32_t * datain = malloc(N * sizeof(uint32_t));
-    uint8_t * buffer;
-    uint32_t * backbuffer = malloc(N * sizeof(uint32_t));
-    uint32_t b;
-    printf("== simple test\n");
-
-    for (k = 0; k < N; ++k) {       /* start with k=0, not k=1! */
-        datain[k] = k;
-    }
-
-    b = maxbits_length(datain, N);
-    buffer = malloc(simdpack_compressedbytes(N,b));
-    endofbuf = simdpack_length(datain, N, (__m128i *)buffer, b);
-    howmanybytes = (endofbuf-(__m128i *)buffer)*sizeof(__m128i); /* number of compressed bytes */
-    compratio = N*sizeof(uint32_t) * 1.0 / howmanybytes;
-    /* endofbuf points to the end of the compressed data */
-    buffer = realloc(buffer,(endofbuf-(__m128i *)buffer)*sizeof(__m128i)); /* optional but safe. */
-    printf("Compressed %d integers down to %d bytes (comp. ratio = %f).\n",(int)N,howmanybytes,compratio);
-    /* in actual applications b must be stored and retrieved: caller is responsible for that. */
-    simdunpack_length((const __m128i *)buffer, N, backbuffer, b); /* will return a pointer to endofbuf */ 
-
-    for (k = 0; k < N; ++k) {
-        if(datain[k] != backbuffer[k]) {
-            printf("bug at %lu \n",(unsigned long)k);
-            return -1;
-        }
-    }
-    printf("Code works!\n");
-    free(datain);
-    free(buffer);
-    free(backbuffer);
-    return 0;
-}
-
-
-
-/* compresses data from datain to buffer, returns how many bytes written
-used below in simple_demo */
-size_t compress(uint32_t * datain, size_t length, uint8_t * buffer) {
-    uint32_t offset;
-    uint8_t * initout;
-    size_t k;
-    if(length/SIMDBlockSize*SIMDBlockSize != length) {
-        printf("Data length should be a multiple of %i \n",SIMDBlockSize);
-    }
-    offset = 0;
-    initout = buffer;
-    for(k = 0; k < length / SIMDBlockSize; ++k) {
-        uint32_t b = simdmaxbitsd1(offset,
-                                   datain + k * SIMDBlockSize);
-        *buffer++ = b;
-        simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, (__m128i *) buffer,
-                              b);
-        offset = datain[k * SIMDBlockSize + SIMDBlockSize - 1];
-        buffer += b * sizeof(__m128i);
-    }
-    return buffer - initout;
-}
-
-/* Another illustration ... */
-void simple_demo() {
-    size_t REPEAT = 10, gap;
-    size_t N = 1000 * SIMDBlockSize;/* SIMDBlockSize is 128 */
-    uint32_t * datain = malloc(N * sizeof(uint32_t));
-    size_t compsize;
-    clock_t start, end;
-    uint8_t * buffer = malloc(N * sizeof(uint32_t) + N / SIMDBlockSize); /* output buffer */
-    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-    printf("== simple demo\n");
-    for (gap = 1; gap <= 243; gap *= 3) {
-        size_t k, repeat;
-        uint32_t offset = 0;
-        uint32_t bogus = 0;
-        double numberofseconds;
-
-        printf("\n");
-        printf(" gap = %lu \n", (unsigned long) gap);
-        datain[0] = 0;
-        for (k = 1; k < N; ++k)
-            datain[k] = datain[k-1] + ( rand() % (gap + 1) );
-        compsize = compress(datain,N,buffer);
-        printf("compression ratio = %f \n",  (N * sizeof(uint32_t))/ (compsize * 1.0 ));
-        start = clock();
-        for(repeat = 0; repeat < REPEAT; ++repeat) {
-            uint8_t * decbuffer = buffer;
-            for (k = 0; k * SIMDBlockSize < N; ++k) {
-                uint8_t b = *decbuffer++;
-                simdunpackd1(offset, (__m128i *) decbuffer, backbuffer, b);
-                /* do something here with backbuffer */
-                bogus += backbuffer[3];
-                decbuffer += b * sizeof(__m128i);
-                offset = backbuffer[SIMDBlockSize - 1];
-            }
-        }
-        end = clock();
-        numberofseconds = (end-start)/(double)CLOCKS_PER_SEC;
-        printf("decoding speed in million of integers per second %f \n",N*REPEAT/(numberofseconds*1000.0*1000.0));
-        start = clock();
-        for(repeat = 0; repeat < REPEAT; ++repeat) {
-            uint8_t * decbuffer = buffer;
-            for (k = 0; k * SIMDBlockSize < N; ++k) {
-                memcpy(backbuffer,decbuffer+k*SIMDBlockSize,SIMDBlockSize*sizeof(uint32_t));
-                bogus += backbuffer[3] - backbuffer[100];
-            }
-        }
-        end = clock();
-        numberofseconds = (end-start)/(double)CLOCKS_PER_SEC;
-        printf("memcpy speed in million of integers per second %f \n",N*REPEAT/(numberofseconds*1000.0*1000.0));
-        printf("ignore me %i \n",bogus);
-        printf("All tests are in CPU cache. Avoid out-of-cache decoding in applications.\n");
-    }
-    free(buffer);
-    free(datain);
-    free(backbuffer);
-}
-
-/* Used below in more_sophisticated_demo ... */
-size_t varying_bit_width_compress(uint32_t * datain, size_t length, uint8_t * buffer) {
-    uint8_t * initout;
-    size_t k;
-    if(length/SIMDBlockSize*SIMDBlockSize != length) {
-        printf("Data length should be a multiple of %i \n",SIMDBlockSize);
-    }
-    initout = buffer;
-    for(k = 0; k < length / SIMDBlockSize; ++k) {
-        uint32_t b = maxbits(datain);
-        *buffer++ = b;
-        simdpackwithoutmask(datain, (__m128i *)buffer, b);
-        datain += SIMDBlockSize;
-        buffer += b * sizeof(__m128i);
-    }
-    return buffer - initout;
-}
-
-/* Here we compress the data in blocks of 128 integers with varying bit width */
-int varying_bit_width_demo() {
-    size_t nn = 128 * 2;
-    uint32_t * datainn = malloc(nn * sizeof(uint32_t));
-    uint8_t * buffern = malloc(nn * sizeof(uint32_t) + nn / SIMDBlockSize);
-    uint8_t * initbuffern = buffern;
-    uint32_t * backbuffern = malloc(nn * sizeof(uint32_t));
-    size_t k, compsize;
-    printf("== varying bit-width demo\n");
-
-    for(k=0; k<nn; ++k) {
-        datainn[k] = rand() % (k + 1);
-    }
-
-    compsize = varying_bit_width_compress(datainn,nn,buffern);
-    printf("encoded size: %u (original size: %u)\n", (unsigned)compsize,
-           (unsigned)(nn * sizeof(uint32_t)));
-
-    for (k = 0; k * SIMDBlockSize < nn; ++k) {
-        uint32_t b = *buffern;
-        buffern++;
-        simdunpack((const __m128i *)buffern, backbuffern + k * SIMDBlockSize, b);
-        buffern += b * sizeof(__m128i);
-    }
-
-    for (k = 0; k < nn; ++k) {
-        if(backbuffern[k] != datainn[k]) {
-            printf("bug\n");
-            return -1;
-        }
-    }
-    printf("Code works!\n");
-    free(datainn);
-    free(initbuffern);
-    free(backbuffern);
-    return 0;
-}
-
-int main() {
-    if(compress_decompress_demo() != 0) return -1;
-    if(varying_bit_width_demo() != 0) return -1;
-    simple_demo();
-    return 0;
-}
--- a/cpp/simdcomp/go/README.md
+++ b/cpp/simdcomp/go/README.md
@@ -1,13 +0,0 @@
-Simple Go demo
-==============
-
-Setup
-======
-
-Start by installing the simdcomp library (make && make install).
-
-Then type:
-
-go run test.go
-
-
--- a/cpp/simdcomp/go/test.go
+++ b/cpp/simdcomp/go/test.go
@@ -1,71 +0,0 @@
-/////////
-// This particular file is in the public domain.
-// Author: Daniel Lemire
-////////
-
-package main 
-
-/*
-#cgo LDFLAGS: -lsimdcomp
-#include <simdcomp.h>
-*/
-import "C"
-import "fmt"
-
-//////////
-// For this demo, we pack and unpack blocks of 128 integers
-/////////
-func main() {
-        // I am going to use C types. Alternative might be to use unsafe.Pointer calls, see http://bit.ly/1ndw3W3
-        // this is our original data
-        var data [128]C.uint32_t
-        for i := C.uint32_t(0); i < C.uint32_t(128); i++ {
-            data[i] = i
-        }
-
-
-
-
-
-        ////////////
-        // We first pack without differential coding
-        ///////////
-        // computing how many bits per int. is needed
-        b  := C.maxbits(&data[0])
-        ratio := 32.0/float64(b)
-        fmt.Println("Bit width  ", b)
-        fmt.Println(fmt.Sprintf("Compression ratio %f ", ratio))
-         // we are now going to create a buffer to receive the packed data (each __m128i uses 128 bits)
-        out := make([] C.__m128i,b)       
-        C.simdpackwithoutmask( &data[0],&out[0],b);
-        var recovereddata [128]C.uint32_t
-        C.simdunpack(&out[0],&recovereddata[0],b)
-        for i := 0; i < 128; i++ {
-            if data[i] != recovereddata[i]  {
-                  fmt.Println("Bug ")
-                  return
-            }
-        } 
-
-        ///////////
-        // Next, we use differential coding
-        //////////
-        offset := C.uint32_t(0) // if you pack data from K to K + 128, offset should be the value at K-1. When K = 0, choose a default
-        b1  := C.simdmaxbitsd1(offset,&data[0])
-        ratio1 := 32.0/float64(b1)
-        fmt.Println("Bit width  ", b1)
-        fmt.Println(fmt.Sprintf("Compression ratio %f ", ratio1))
-         // we are now going to create a buffer to receive the packed data (each __m128i uses 128 bits)
-        out = make([] C.__m128i,b1)       
-        C.simdpackwithoutmaskd1(offset, &data[0],&out[0],b1);
-        C.simdunpackd1(offset,&out[0],&recovereddata[0],b1)
-        for i := 0; i < 128; i++ {
-            if data[i] != recovereddata[i]  {
-                  fmt.Println("Bug ")
-                  return
-            }
-        } 
-
-        fmt.Println("test succesful.")
-      
-}
--- a/cpp/simdcomp/include/avxbitpacking.h
+++ b/cpp/simdcomp/include/avxbitpacking.h
@@ -1,40 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-
-#ifndef INCLUDE_AVXBITPACKING_H_
-#define INCLUDE_AVXBITPACKING_H_
-
-
-#ifdef __AVX2__
-
-#include "portability.h"
-
-
-/* AVX2 is required */
-#include <immintrin.h>
-/* for memset */
-#include <string.h>
-
-#include "simdcomputil.h"
-
-enum{ AVXBlockSize = 256};
-
-/* max integer logarithm over a range of AVXBlockSize integers (256 integer) */
-uint32_t avxmaxbits(const uint32_t * begin);
-
-/* reads 256 values from "in", writes  "bit" 256-bit vectors to "out" */
-void avxpack(const uint32_t *  in,__m256i *  out, const uint32_t bit);
-
-/* reads 256 values from "in", writes  "bit" 256-bit vectors to "out" */
-void avxpackwithoutmask(const uint32_t *  in,__m256i *  out, const uint32_t bit);
-
-/* reads  "bit" 256-bit vectors from "in", writes  256 values to "out" */
-void avxunpack(const __m256i *  in,uint32_t *  out, const uint32_t bit);
-
-
-
-
-#endif /* __AVX2__ */
-
-#endif /* INCLUDE_AVXBITPACKING_H_ */
--- a/cpp/simdcomp/include/portability.h
+++ b/cpp/simdcomp/include/portability.h
@@ -1,81 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-#ifndef SIMDBITCOMPAT_H_
-#define SIMDBITCOMPAT_H_
-
-#include <iso646.h> /* mostly for Microsoft compilers */
-#include <string.h>
-
-#if SIMDCOMP_DEBUG
-# define SIMDCOMP_ALWAYS_INLINE inline
-# define SIMDCOMP_NEVER_INLINE
-# define SIMDCOMP_PURE
-#else
-# if defined(__GNUC__)
-#  if __GNUC__ >= 3
-#   define SIMDCOMP_ALWAYS_INLINE inline __attribute__((always_inline))
-#   define SIMDCOMP_NEVER_INLINE __attribute__((noinline))
-#   define SIMDCOMP_PURE __attribute__((pure))
-#  else
-#   define SIMDCOMP_ALWAYS_INLINE inline
-#   define SIMDCOMP_NEVER_INLINE
-#   define SIMDCOMP_PURE
-#  endif
-# elif defined(_MSC_VER)
-#  define SIMDCOMP_ALWAYS_INLINE __forceinline
-#  define SIMDCOMP_NEVER_INLINE
-#  define SIMDCOMP_PURE
-# else
-#  if __has_attribute(always_inline)
-#   define SIMDCOMP_ALWAYS_INLINE inline __attribute__((always_inline))
-#  else
-#   define SIMDCOMP_ALWAYS_INLINE inline
-#  endif
-#  if __has_attribute(noinline)
-#   define SIMDCOMP_NEVER_INLINE __attribute__((noinline))
-#  else
-#   define SIMDCOMP_NEVER_INLINE
-#  endif
-#  if __has_attribute(pure)
-#   define SIMDCOMP_PURE __attribute__((pure))
-#  else
-#   define SIMDCOMP_PURE
-#  endif
-# endif
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER < 1600
-typedef unsigned int uint32_t;
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-#else
-#include <stdint.h> /* part of Visual Studio 2010 and better, others likely anyway */
-#endif
-
-#if defined(_MSC_VER)
-#define SIMDCOMP_ALIGNED(x) __declspec(align(x))
-#else
-#if defined(__GNUC__)
-#define SIMDCOMP_ALIGNED(x) __attribute__ ((aligned(x)))
-#endif
-#endif
-
-#if defined(_MSC_VER)
-# include <intrin.h>
-/* 64-bit needs extending */
-# define SIMDCOMP_CTZ(result, mask) do { \
-		unsigned long index; \
-		if (!_BitScanForward(&(index), (mask))) { \
-			(result) = 32U; \
-		} else { \
-			(result) = (uint32_t)(index); \
-		} \
-	} while (0)
-#else
-# define SIMDCOMP_CTZ(result, mask) \
-	result = __builtin_ctz(mask)
-#endif
-
-#endif /* SIMDBITCOMPAT_H_ */
-
--- a/cpp/simdcomp/include/simdbitpacking.h
+++ b/cpp/simdcomp/include/simdbitpacking.h
@@ -1,72 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-#ifndef SIMDBITPACKING_H_
-#define SIMDBITPACKING_H_
-
-#include "portability.h"
-
-/* SSE2 is required */
-#include <emmintrin.h>
-/* for memset */
-#include <string.h>
-
-#include "simdcomputil.h"
-
-/***
-* Please see example.c for various examples on how to make good use
-* of these functions.
-*/
-
-
-
-/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out".
- * The input values are masked so that only the least significant "bit" bits are used. */
-void simdpack(const uint32_t *  in,__m128i *  out, const uint32_t bit);
-
-/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out".
- * The input values are assumed to be less than 1<<bit. */
-void simdpackwithoutmask(const uint32_t *  in,__m128i *  out, const uint32_t bit);
-
-/* reads  "bit" 128-bit vectors from "in", writes  128 values to "out" */
-void simdunpack(const __m128i *  in,uint32_t *  out, const uint32_t bit);
-
-
-
-/* how many compressed bytes are needed to compressed length integers using a bit width of bit with 
-the  simdpackFOR_length function. */
-int simdpack_compressedbytes(int length, const uint32_t bit);
-
-/* like simdpack, but supports an undetermined number of inputs.
- * This is useful if you need to unpack an array of integers that is not divisible by 128 integers.
- * Returns a pointer to the (advanced) compressed array. Compressed data is stored in the memory location between 
- the provided (out) pointer and the returned pointer. */
-__m128i * simdpack_length(const uint32_t *   in, size_t length, __m128i *    out, const uint32_t bit);
-
-/* like simdunpack, but supports an undetermined number of inputs.
- * This is useful if you need to unpack an array of integers that is not divisible by 128 integers.
- * Returns a pointer to the (advanced) compressed array. The read compressed data is between the provided 
- (in) pointer and the returned pointer. */
-const __m128i * simdunpack_length(const __m128i *   in, size_t length, uint32_t * out, const uint32_t bit);
-
-
-
-
-/* like simdpack, but supports an undetermined small number of inputs. This is useful if you need to pack less 
-than 128 integers.
- * Note that this function is much slower.
- * Returns a pointer to the (advanced) compressed array. Compressed data is stored in the memory location 
- between the provided (out) pointer and the returned pointer. */
-__m128i * simdpack_shortlength(const uint32_t *   in, int length, __m128i *    out, const uint32_t bit);
-
-/* like simdunpack, but supports an undetermined small number of inputs. This is useful if you need to unpack less
- than 128 integers.
- * Note that this function is much slower.
- * Returns a pointer to the (advanced) compressed array. The read compressed data is between the provided (in) 
- pointer and the returned pointer. */
-const __m128i * simdunpack_shortlength(const __m128i *   in, int length, uint32_t * out, const uint32_t bit);
-
-/* given a block of 128 packed values, this function sets the value at index "index" to "value" */
-void simdfastset(__m128i * in128, uint32_t b, uint32_t value, size_t index);
-
-#endif /* SIMDBITPACKING_H_ */
--- a/cpp/simdcomp/include/simdcomp.h
+++ b/cpp/simdcomp/include/simdcomp.h
@@ -1,22 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-
-#ifndef SIMDCOMP_H_
-#define SIMDCOMP_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "simdbitpacking.h"
-#include "simdcomputil.h"
-#include "simdfor.h"
-#include "simdintegratedbitpacking.h"
-#include "avxbitpacking.h"
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif 
--- a/cpp/simdcomp/include/simdcomputil.h
+++ b/cpp/simdcomp/include/simdcomputil.h
@@ -1,54 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-
-#ifndef SIMDCOMPUTIL_H_
-#define SIMDCOMPUTIL_H_
-
-#include "portability.h"
-
-/* SSE2 is required */
-#include <emmintrin.h>
-
-
-
-
-/* returns the integer logarithm of v (bit width) */
-uint32_t bits(const uint32_t v);
-
-/* max integer logarithm over a range of SIMDBlockSize integers (128 integer) */
-uint32_t maxbits(const uint32_t * begin);
-
-/* same as maxbits, but we specify the number of integers */
-uint32_t maxbits_length(const uint32_t * in,uint32_t length);
-
-enum{ SIMDBlockSize = 128};
-
-
-/* computes (quickly) the minimal value of 128 values */
-uint32_t simdmin(const uint32_t * in);
-
-/* computes (quickly) the minimal value of the specified number of values */
-uint32_t simdmin_length(const uint32_t * in, uint32_t length);
-
-#ifdef __SSE4_1__
-/* computes (quickly) the minimal and maximal value of the specified number of values */
-void simdmaxmin_length(const uint32_t * in, uint32_t length, uint32_t * getmin, uint32_t * getmax);
-
-/* computes (quickly) the minimal and maximal value of the 128 values */
-void simdmaxmin(const uint32_t * in, uint32_t * getmin, uint32_t * getmax);
-
-#endif
-
-/* like maxbit over 128 integers (SIMDBlockSize) with provided initial value 
-   and using differential coding */
-uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in);
-
-/* like simdmaxbitsd1, but calculates maxbits over |length| integers 
-   with provided initial value. |length| can be any arbitrary value. */
-uint32_t simdmaxbitsd1_length(uint32_t initvalue, const uint32_t * in,
-                uint32_t length);
-
-
-
-#endif /* SIMDCOMPUTIL_H_ */
--- a/cpp/simdcomp/include/simdfor.h
+++ b/cpp/simdcomp/include/simdfor.h
@@ -1,72 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-#ifndef INCLUDE_SIMDFOR_H_
-#define INCLUDE_SIMDFOR_H_
-
-#include "portability.h"
-
-/* SSE2 is required */
-#include <emmintrin.h>
-
-#include "simdcomputil.h"
-#include "simdbitpacking.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out" */
-void simdpackFOR(uint32_t initvalue, const uint32_t *  in,__m128i *  out, const uint32_t bit);
-
-
-/* reads "bit" 128-bit vectors from "in", writes  128 values to "out" */
-void simdunpackFOR(uint32_t initvalue, const __m128i *  in,uint32_t *  out, const uint32_t bit);
-
-
-/* how many compressed bytes are needed to compressed length integers using a bit width of bit with 
-the  simdpackFOR_length function. */
-int simdpackFOR_compressedbytes(int length, const uint32_t bit);
-
-/* like simdpackFOR, but supports an undetermined number of inputs. 
-This is useful if you need to pack less than 128 integers. Note that this function is much slower. 
- Compressed data is stored in the memory location between 
- the provided (out) pointer and the returned pointer. */
-__m128i * simdpackFOR_length(uint32_t initvalue, const uint32_t *   in, int length, __m128i *    out, const uint32_t bit);
-
-/* like simdunpackFOR, but supports an undetermined number of inputs. 
-This is useful if you need to unpack less than 128 integers. Note that this function is much slower. 
- The read compressed data is between the provided 
- (in) pointer and the returned pointer.  */
-const __m128i * simdunpackFOR_length(uint32_t initvalue, const __m128i *   in, int length, uint32_t * out, const uint32_t bit);
-
-
-/* returns the value stored at the specified "slot".
-* */
-uint32_t simdselectFOR(uint32_t initvalue, const __m128i *in, uint32_t bit,
-                int slot);
-
-/* given a block of 128 packed values, this function sets the value at index "index" to "value" */
-void simdfastsetFOR(uint32_t initvalue, __m128i * in, uint32_t bit, uint32_t value, size_t index);
-
-
-/* searches "bit" 128-bit vectors from "in" (= length<=128 encoded integers) for the first encoded uint32 value
- * which is >= |key|, and returns its position. It is assumed that the values
- * stored are in sorted order.
- * The encoded key is stored in "*presult".
- * The first length decoded integers, ignoring others. If no value is larger or equal to the key,
- * length is returned. Length should be no larger than 128.
- *
- * If no value is larger or equal to the key,
-* length is returned */
-int simdsearchwithlengthFOR(uint32_t initvalue, const __m128i *in, uint32_t bit,
-                int length, uint32_t key, uint32_t *presult);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-
-
-
-#endif /* INCLUDE_SIMDFOR_H_ */
--- a/cpp/simdcomp/include/simdintegratedbitpacking.h
+++ b/cpp/simdcomp/include/simdintegratedbitpacking.h
@@ -1,98 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-
-#ifndef SIMD_INTEGRATED_BITPACKING_H
-#define SIMD_INTEGRATED_BITPACKING_H
-
-#include "portability.h"
-
-/* SSE2 is required */
-#include <emmintrin.h>
-
-#include "simdcomputil.h"
-#include "simdbitpacking.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out"
-   integer values should be in sorted order (for best results).
-   The differences are masked so that only the least significant "bit" bits are used. */
-void simdpackd1(uint32_t initvalue, const uint32_t *  in,__m128i *  out, const uint32_t bit);
-
-
-/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out"
-   integer values should be in sorted order (for best results).
-   The difference values are assumed to be less than 1<<bit. */
-void simdpackwithoutmaskd1(uint32_t initvalue, const uint32_t *  in,__m128i *  out, const uint32_t bit);
-
-
-/* reads "bit" 128-bit vectors from "in", writes  128 values to "out" */
-void simdunpackd1(uint32_t initvalue, const __m128i *  in,uint32_t *  out, const uint32_t bit);
-
-
-/* searches "bit" 128-bit vectors from "in" (= 128 encoded integers) for the first encoded uint32 value
- * which is >= |key|, and returns its position. It is assumed that the values
- * stored are in sorted order.
- * The encoded key is stored in "*presult". If no value is larger or equal to the key,
-* 128 is returned. The pointer initOffset is a pointer to the last four value decoded
-* (when starting out, this can be a zero vector or initialized with _mm_set1_epi32(init)),
-* and the vector gets updated.
-**/
-int
-simdsearchd1(__m128i * initOffset, const __m128i *in, uint32_t bit,
-                uint32_t key, uint32_t *presult);
-
-
-/* searches "bit" 128-bit vectors from "in" (= length<=128 encoded integers) for the first encoded uint32 value
- * which is >= |key|, and returns its position. It is assumed that the values
- * stored are in sorted order.
- * The encoded key is stored in "*presult".
- * The first length decoded integers, ignoring others. If no value is larger or equal to the key,
- * length is returned. Length should be no larger than 128.
- *
- * If no value is larger or equal to the key,
-* length is returned */
-int simdsearchwithlengthd1(uint32_t initvalue, const __m128i *in, uint32_t bit,
-                int length, uint32_t key, uint32_t *presult);
-
-
-
-/* returns the value stored at the specified "slot".
-* */
-uint32_t simdselectd1(uint32_t initvalue, const __m128i *in, uint32_t bit,
-                int slot);
-
-/* given a block of 128 packed values, this function sets the value at index "index" to "value",
- * you must somehow know the previous value.
- * Because of differential coding, all following values are incremented by the offset between this new
- * value and the old value... 
- * This functions is useful if you want to modify the last value. 
- */
-void simdfastsetd1fromprevious( __m128i * in, uint32_t bit, uint32_t previousvalue, uint32_t value, size_t index);
-
-/* given a block of 128 packed values, this function sets the value at index "index" to "value",
- * This function computes the previous value if needed.
- * Because of differential coding, all following values are incremented by the offset between this new
- * value and the old value...
- * This functions is useful if you want to modify the last value. 
- */
-void simdfastsetd1(uint32_t initvalue, __m128i * in, uint32_t bit, uint32_t value, size_t index);
-
-
-/*Simply scan the data
-* The pointer initOffset is a pointer to the last four value decoded
-* (when starting out, this can be a zero vector or initialized with _mm_set1_epi32(init);),
-* and the vector gets updated.
-* */
-
-void
-simdscand1(__m128i * initOffset, const __m128i *in, uint32_t bit);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif
--- a/cpp/simdcomp/makefile
+++ b/cpp/simdcomp/makefile
@@ -1,79 +0,0 @@
-# minimalist makefile
-.SUFFIXES:
-#
-.SUFFIXES: .cpp .o .c .h
-ifeq ($(DEBUG),1)
-CFLAGS = -fPIC  -std=c89 -ggdb -msse4.1 -march=native -Wall -Wextra -Wshadow -fsanitize=undefined  -fno-omit-frame-pointer -fsanitize=address
-else
-CFLAGS = -fPIC -std=c89 -O3 -msse4.1  -march=native -Wall -Wextra -Wshadow
-endif # debug
-LDFLAGS = -shared
-LIBNAME=libsimdcomp.so.0.0.3
-all:  unit unit_chars bitpackingbenchmark $(LIBNAME)
-test:
-	./unit
-	./unit_chars
-install: $(OBJECTS)
-	cp $(LIBNAME) /usr/local/lib
-	ln -s /usr/local/lib/$(LIBNAME) /usr/local/lib/libsimdcomp.so
-	ldconfig
-	cp $(HEADERS) /usr/local/include
-
-
-
-HEADERS=./include/simdbitpacking.h ./include/simdcomputil.h ./include/simdintegratedbitpacking.h ./include/simdcomp.h ./include/simdfor.h ./include/avxbitpacking.h
-
-uninstall:
-	for h in $(HEADERS) ; do rm  /usr/local/$$h; done
-	rm  /usr/local/lib/$(LIBNAME)
-	rm /usr/local/lib/libsimdcomp.so
-	ldconfig
-
-
-OBJECTS= simdbitpacking.o simdintegratedbitpacking.o simdcomputil.o \
-		 simdpackedsearch.o simdpackedselect.o simdfor.o avxbitpacking.o
-
-$(LIBNAME): $(OBJECTS)
-	$(CC) $(CFLAGS) -o $(LIBNAME) $(OBJECTS)  $(LDFLAGS)
-
-
-avxbitpacking.o: ./src/avxbitpacking.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/avxbitpacking.c -Iinclude
-
-
-simdfor.o: ./src/simdfor.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/simdfor.c -Iinclude
-
-
-simdcomputil.o: ./src/simdcomputil.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/simdcomputil.c -Iinclude
-
-simdbitpacking.o: ./src/simdbitpacking.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/simdbitpacking.c -Iinclude
-
-simdintegratedbitpacking.o: ./src/simdintegratedbitpacking.c  $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/simdintegratedbitpacking.c -Iinclude
-
-simdpackedsearch.o: ./src/simdpackedsearch.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/simdpackedsearch.c -Iinclude
-
-simdpackedselect.o: ./src/simdpackedselect.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/simdpackedselect.c -Iinclude
-
-example: ./example.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o example ./example.c -Iinclude  $(OBJECTS)
-
-unit: ./tests/unit.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o unit ./tests/unit.c -Iinclude  $(OBJECTS)
-
-bitpackingbenchmark: ./benchmarks/bitpackingbenchmark.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o bitpackingbenchmark ./benchmarks/bitpackingbenchmark.c -Iinclude  $(OBJECTS)
-benchmark: ./benchmarks/benchmark.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o benchmark ./benchmarks/benchmark.c -Iinclude  $(OBJECTS)
-dynunit: ./tests/unit.c    $(HEADERS) $(LIBNAME)
-	$(CC) $(CFLAGS) -o dynunit ./tests/unit.c -Iinclude  -lsimdcomp
-
-unit_chars: ./tests/unit_chars.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o unit_chars ./tests/unit_chars.c -Iinclude  $(OBJECTS)
-clean:
-	rm -f unit *.o $(LIBNAME) example benchmark bitpackingbenchmark dynunit unit_chars
--- a/cpp/simdcomp/makefile.vc
+++ b/cpp/simdcomp/makefile.vc
@@ -1,104 +0,0 @@
-
-!IFNDEF MACHINE
-!IF "$(PROCESSOR_ARCHITECTURE)"=="AMD64"
-MACHINE=x64
-!ELSE
-MACHINE=x86
-!ENDIF
-!ENDIF
-
-!IFNDEF DEBUG
-DEBUG=no
-!ENDIF
-
-!IFNDEF CC
-CC=cl.exe
-!ENDIF
-
-!IFNDEF AR
-AR=lib.exe
-!ENDIF
-
-!IFNDEF LINK
-LINK=link.exe
-!ENDIF
-
-!IFNDEF PGO
-PGO=no
-!ENDIF
-
-!IFNDEF PGI
-PGI=no
-!ENDIF
-
-INC = /Iinclude
-
-!IF "$(DEBUG)"=="yes"
-CFLAGS = /nologo /MDd /LDd /Od /Zi /D_DEBUG /RTC1 /W3 /GS /Gm
-ARFLAGS = /nologo
-LDFLAGS = /nologo /debug /nodefaultlib:msvcrt
-!ELSE
-CFLAGS = /nologo /MD /O2 /Zi /DNDEBUG /W3 /Gm- /GS /Gy /Oi /GL /MP
-ARFLAGS = /nologo /LTCG
-LDFLAGS = /nologo /LTCG /DYNAMICBASE /incremental:no /debug /opt:ref,icf
-!ENDIF
-
-!IF "$(PGI)"=="yes"
-LDFLAGS = $(LDFLAGS) /ltcg:pgi
-!ENDIF
-
-!IF "$(PGO)"=="yes"
-LDFLAGS = $(LDFLAGS) /ltcg:pgo
-!ENDIF
-
-LIB_OBJS = simdbitpacking.obj simdintegratedbitpacking.obj simdcomputil.obj \
-	simdpackedsearch.obj simdpackedselect.obj simdfor.obj
-
-
-all: lib dll dynunit unit_chars example benchmark
-# need some good use case scenario to train the instrumented build
-	@if "$(PGI)"=="yes" echo Running PGO training
-	@if "$(PGI)"=="yes" benchmark.exe >nul 2>&1
-	@if "$(PGI)"=="yes" example.exe >nul 2>&1
-
-
-$(LIB_OBJS):
-	$(CC) $(INC) $(CFLAGS) /c src/simdbitpacking.c src/simdintegratedbitpacking.c src/simdcomputil.c \
-		src/simdpackedsearch.c src/simdpackedselect.c src/simdfor.c
-
-lib: $(LIB_OBJS)
-	$(AR) $(ARFLAGS) /OUT:simdcomp_a.lib $(LIB_OBJS)
-
-dll: $(LIB_OBJS)
-	$(LINK) /DLL $(LDFLAGS) /OUT:simdcomp.dll /IMPLIB:simdcomp.lib /DEF:simdcomp.def $(LIB_OBJS)
-
-unit: lib
-	$(CC) $(INC) $(CFLAGS) /c src/unit.c 
-	$(LINK) $(LDFLAGS) /OUT:unit.exe unit.obj simdcomp_a.lib
-
-dynunit: dll
-	$(CC) $(INC) $(CFLAGS) /c src/unit.c 
-	$(LINK) $(LDFLAGS) /OUT:unit.exe unit.obj simdcomp.lib
-
-unit_chars: lib
-	$(CC) $(INC) $(CFLAGS) /c src/unit_chars.c
-	$(LINK) $(LDFLAGS) /OUT:unit_chars.exe unit_chars.obj simdcomp.lib
-
-
-example: lib
-	$(CC) $(INC) $(CFLAGS) /c example.c
-	$(LINK) $(LDFLAGS) /OUT:example.exe example.obj simdcomp.lib
-
-benchmark: lib
-	$(CC) $(INC) $(CFLAGS) /c src/benchmark.c
-	$(LINK) $(LDFLAGS) /OUT:benchmark.exe benchmark.obj simdcomp.lib
-
-clean:
-	del /Q *.obj
-	del /Q *.lib
-	del /Q *.exe
-	del /Q *.dll
-	del /Q *.pgc
-	del /Q *.pgd
-	del /Q *.pdb
-
--- a/cpp/simdcomp/package.json
+++ b/cpp/simdcomp/package.json
@@ -1,16 +0,0 @@
-{
-  "name": "simdcomp",
-  "version": "0.0.3",
-  "repo": "lemire/simdcomp",
-  "description": "A simple C library for compressing lists of integers",
-  "license": "BSD-3-Clause",
-  "src": [
-    "src/simdbitpacking.c",
-    "src/simdcomputil.c",
-    "src/simdintegratedbitpacking.c",
-    "include/simdbitpacking.h",
-    "include/simdcomp.h",
-    "include/simdcomputil.h",
-    "include/simdintegratedbitpacking.h"
-  ]
-}
--- a/cpp/simdcomp/scripts/avxpacking.py
+++ b/cpp/simdcomp/scripts/avxpacking.py
@@ -1,182 +0,0 @@
-#!/usr/bin/env python
-import sys
-def howmany(bit):
-    """ how many values are we going to pack? """
-    return 256
-
-def howmanywords(bit):
-    return (howmany(bit) * bit + 255)/256
-
-def howmanybytes(bit):
-    return howmanywords(bit) * 16
-
-print("""
-/** code generated by avxpacking.py starts here **/
-""")
-
-print("""typedef void (*avxpackblockfnc)(const uint32_t * pin, __m256i * compressed);""")
-print("""typedef void (*avxunpackblockfnc)(const __m256i * compressed, uint32_t * pout);""")
-
-
-
-
-
-
-def plurial(number):
-    if(number <> 1):
-        return "s"
-    else :
-        return ""
-
-print("")
-print("static void avxpackblock0(const uint32_t * pin, __m256i * compressed) {");
-print("  (void)compressed;");
-print("  (void) pin; /* we consumed {0} 32-bit integer{1} */ ".format(howmany(0),plurial(howmany(0))));
-print("}");
-print("")
-
-for bit in range(1,33):
-    print("")
-    print("/* we are going to pack {0} {1}-bit values, touching {2} 256-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
-    print("static void avxpackblock{0}(const uint32_t * pin, __m256i * compressed) {{".format(bit));
-    print("  const __m256i * in = (const __m256i *)  pin;");
-    print("  /* we are going to touch  {0} 256-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
-    if(howmanywords(bit) == 1):
-      print("  __m256i w0;")
-    else:
-      print("  __m256i w0, w1;")
-    if( (bit & (bit-1)) <> 0) : print("  __m256i tmp; /* used to store inputs at word boundary */")
-    oldword = 0
-    for j in range(howmany(bit)/8):
-      firstword = j * bit / 32
-      if(firstword > oldword):
-        print("  _mm256_storeu_si256(compressed + {0}, w{1});".format(oldword,oldword%2))
-        oldword = firstword
-      secondword = (j * bit + bit - 1)/32
-      firstshift = (j*bit) % 32
-      if( firstword == secondword):
-          if(firstshift == 0):
-            print("  w{0} = _mm256_lddqu_si256 (in + {1});".format(firstword%2,j))
-          else:
-            print("  w{0} = _mm256_or_si256(w{0},_mm256_slli_epi32(_mm256_lddqu_si256 (in + {1}) , {2}));".format(firstword%2,j,firstshift))
-      else:
-          print("  tmp = _mm256_lddqu_si256 (in + {0});".format(j))
-          print("  w{0} = _mm256_or_si256(w{0},_mm256_slli_epi32(tmp , {2}));".format(firstword%2,j,firstshift))
-          secondshift = 32-firstshift
-          print("  w{0} = _mm256_srli_epi32(tmp,{2});".format(secondword%2,j,secondshift))
-    print("  _mm256_storeu_si256(compressed + {0}, w{1});".format(secondword,secondword%2))
-    print("}");
-    print("")
-
-
-print("")
-print("static void avxpackblockmask0(const uint32_t * pin, __m256i * compressed) {");
-print("  (void)compressed;");
-print("  (void) pin; /* we consumed {0} 32-bit integer{1} */ ".format(howmany(0),plurial(howmany(0))));
-print("}");
-print("")
-
-for bit in range(1,33):
-    print("")
-    print("/* we are going to pack {0} {1}-bit values, touching {2} 256-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
-    print("static void avxpackblockmask{0}(const uint32_t * pin, __m256i * compressed) {{".format(bit));
-    print("  /* we are going to touch  {0} 256-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
-    if(howmanywords(bit) == 1):
-      print("  __m256i w0;")
-    else:
-      print("  __m256i w0, w1;")
-    print("  const __m256i * in = (const __m256i *) pin;");
-    if(bit < 32): print("  const __m256i mask = _mm256_set1_epi32({0});".format((1<<bit)-1));
-    def maskfnc(x):
-        if(bit == 32): return x
-        return " _mm256_and_si256 ( mask, {0}) ".format(x)
-    if( (bit & (bit-1)) <> 0) : print("  __m256i tmp; /* used to store inputs at word boundary */")
-    oldword = 0
-    for j in range(howmany(bit)/8):
-      firstword = j * bit / 32
-      if(firstword > oldword):
-        print("  _mm256_storeu_si256(compressed + {0}, w{1});".format(oldword,oldword%2))
-        oldword = firstword
-      secondword = (j * bit + bit - 1)/32
-      firstshift = (j*bit) % 32
-      loadstr = maskfnc(" _mm256_lddqu_si256 (in + {0}) ".format(j))
-      if( firstword == secondword):
-          if(firstshift == 0):
-            print("  w{0} = {1};".format(firstword%2,loadstr))
-          else:
-            print("  w{0} = _mm256_or_si256(w{0},_mm256_slli_epi32({1} , {2}));".format(firstword%2,loadstr,firstshift))
-      else:
-          print("  tmp = {0};".format(loadstr))
-          print("  w{0} = _mm256_or_si256(w{0},_mm256_slli_epi32(tmp , {2}));".format(firstword%2,j,firstshift))
-          secondshift = 32-firstshift
-          print("  w{0} = _mm256_srli_epi32(tmp,{2});".format(secondword%2,j,secondshift))
-    print("  _mm256_storeu_si256(compressed + {0}, w{1});".format(secondword,secondword%2))
-    print("}");
-    print("")
-
-
-print("static void avxunpackblock0(const __m256i * compressed, uint32_t * pout) {");
-print("  (void) compressed;");
-print("  memset(pout,0,{0});".format(howmany(0)));
-print("}");
-print("")
-
-for bit in range(1,33):
-    print("")
-    print("/* we packed {0} {1}-bit values, touching {2} 256-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
-    print("static void avxunpackblock{0}(const __m256i * compressed, uint32_t * pout) {{".format(bit));
-    print("  /* we are going to access  {0} 256-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
-    if(howmanywords(bit) == 1):
-      print("  __m256i w0;")
-    else:
-      print("  __m256i w0, w1;")
-    print("  __m256i * out = (__m256i *) pout;");
-    if(bit < 32): print("  const __m256i mask = _mm256_set1_epi32({0});".format((1<<bit)-1));
-    maskstr = " _mm256_and_si256 ( mask, {0}) "
-    if (bit == 32) : maskstr = " {0} " # no need
-    oldword = 0
-    print("  w0 = _mm256_lddqu_si256 (compressed);")
-    for j in range(howmany(bit)/8):
-      firstword = j * bit / 32
-      secondword = (j * bit + bit - 1)/32
-      if(secondword > oldword):
-        print("  w{0} = _mm256_lddqu_si256 (compressed + {1});".format(secondword%2,secondword))
-        oldword = secondword
-      firstshift = (j*bit) % 32
-      firstshiftstr = "_mm256_srli_epi32( w{0} , "+str(firstshift)+") "
-      if(firstshift == 0):
-          firstshiftstr =" w{0} " # no need
-      wfirst = firstshiftstr.format(firstword%2)
-      if( firstword == secondword):
-          if(firstshift + bit <> 32):
-            wfirst  = maskstr.format(wfirst)
-          print("  _mm256_storeu_si256(out + {0}, {1});".format(j,wfirst))
-      else:
-          secondshift = (32-firstshift)
-          wsecond = "_mm256_slli_epi32( w{0} , {1} ) ".format((firstword+1)%2,secondshift)
-          wfirstorsecond = " _mm256_or_si256 ({0},{1}) ".format(wfirst,wsecond)
-          wfirstorsecond = maskstr.format(wfirstorsecond)
-          print("  _mm256_storeu_si256(out + {0},\n    {1});".format(j,wfirstorsecond))
-    print("}");
-    print("")
-
-
-print("static avxpackblockfnc avxfuncPackArr[] = {")
-for bit in range(0,32):
-  print("&avxpackblock{0},".format(bit))
-print("&avxpackblock32")
-print("};")
-
-print("static avxpackblockfnc avxfuncPackMaskArr[] = {")
-for bit in range(0,32):
-  print("&avxpackblockmask{0},".format(bit))
-print("&avxpackblockmask32")
-print("};")
-
-
-print("static avxunpackblockfnc avxfuncUnpackArr[] = {")
-for bit in range(0,32):
-  print("&avxunpackblock{0},".format(bit))
-print("&avxunpackblock32")
-print("};")
-print("/** code generated by avxpacking.py ends here **/")
--- a/cpp/simdcomp/scripts/simdfor.py
+++ b/cpp/simdcomp/scripts/simdfor.py
@@ -1,152 +0,0 @@
-#!/usr/bin/env python3
-
-
-from math import ceil
-
-print("""
-/**
-* Blablabla
-*
-*/
-
-""");
-
-def mask(bit):
-  return str((1 << bit) - 1)
-
-for length in [32]:
-  print("""
-static __m128i  iunpackFOR0(__m128i initOffset, const __m128i *   _in , uint32_t *    _out) {
-    __m128i       *out = (__m128i*)(_out);
-    int i;
-    (void) _in;
-    for (i = 0; i < 8; ++i) {
-        _mm_store_si128(out++, initOffset);
-    	_mm_store_si128(out++, initOffset);
-        _mm_store_si128(out++, initOffset);
-        _mm_store_si128(out++, initOffset);
-    }
-
-    return initOffset;
-}
-
-  """)
-  print("""
-
-static void ipackFOR0(__m128i initOffset , const uint32_t *   _in , __m128i *  out  ) {
-    (void) initOffset;
-    (void) _in;
-    (void) out;
-}
-""") 
-  for bit in range(1,33):
-    offsetVar = " initOffset";
-    print("""  
-static void ipackFOR"""+str(bit)+"""(__m128i """+offsetVar+""", const uint32_t *   _in, __m128i *   out) {
-    const __m128i       *in = (const __m128i*)(_in);
-    __m128i    OutReg;
-
-      """);
-    
-    if (bit != 32):
-      print("    __m128i CurrIn = _mm_load_si128(in);");
-      print("    __m128i InReg = _mm_sub_epi32(CurrIn, initOffset);");
-    else:
-      print("    __m128i InReg = _mm_load_si128(in);");
-      print("    (void) initOffset;");
-
-
-    inwordpointer = 0
-    valuecounter = 0
-    for k in range(ceil((length * bit) / 32)):
-      if(valuecounter == length): break
-      for x in range(inwordpointer,32,bit):
-        if(x!=0) :
-          print("    OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, " + str(x) + "));");
-        else:
-          print("    OutReg = InReg; ");
-        if((x+bit>=32) ):
-          while(inwordpointer<32):
-            inwordpointer += bit
-          print("    _mm_store_si128(out, OutReg);");
-          print("");
-
-          if(valuecounter + 1 < length):
-            print("    ++out;")
-          inwordpointer -= 32;
-          if(inwordpointer>0):
-            print("    OutReg = _mm_srli_epi32(InReg, " + str(bit) + " - " + str(inwordpointer) + ");");
-        if(valuecounter + 1 < length):
-          print("    ++in;") 
-
-          if (bit != 32):
-            print("    CurrIn = _mm_load_si128(in);");
-            print("    InReg = _mm_sub_epi32(CurrIn, initOffset);");
-          else:
-            print("    InReg = _mm_load_si128(in);");
-          print("");
-        valuecounter = valuecounter + 1
-        if(valuecounter == length): break
-    assert(valuecounter == length)
-    print("\n}\n\n""")
-
-  for bit in range(1,32):
-    offsetVar = " initOffset";
-    print("""\n
-static __m128i iunpackFOR"""+str(bit)+"""(__m128i """+offsetVar+""", const  __m128i*   in, uint32_t *   _out) {
-      """);
-    print("""    __m128i*   out = (__m128i*)(_out);
-    __m128i    InReg = _mm_load_si128(in);
-    __m128i    OutReg;    
-    __m128i     tmp;
-    const __m128i mask =  _mm_set1_epi32((1U<<"""+str(bit)+""")-1);
-
-    """);
-
-    MainText = "";
-
-    MainText += "\n";
-    inwordpointer = 0
-    valuecounter = 0
-    for k in range(ceil((length * bit) / 32)):
-      for x in range(inwordpointer,32,bit):
-        if(valuecounter == length): break
-        if (x > 0):
-          MainText += "    tmp = _mm_srli_epi32(InReg," + str(x) +");\n"; 
-        else:
-          MainText += "    tmp = InReg;\n"; 
-        if(x+bit<32):
-          MainText += "    OutReg = _mm_and_si128(tmp, mask);\n";
-        else:
-          MainText += "    OutReg = tmp;\n";        
-        if((x+bit>=32) ):      
-          while(inwordpointer<32):
-            inwordpointer += bit
-          if(valuecounter + 1 < length):
-             MainText += "    ++in;"
-             MainText += "    InReg = _mm_load_si128(in);\n";
-          inwordpointer -= 32;
-          if(inwordpointer>0):
-            MainText += "    OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, " + str(bit) + "-" + str(inwordpointer) + "), mask));\n\n";
-        if (bit != 32):
-          MainText += "    OutReg = _mm_add_epi32(OutReg, initOffset);\n"; 
-        MainText += "    _mm_store_si128(out++, OutReg);\n\n"; 
-        MainText += "";
-        valuecounter = valuecounter + 1
-        if(valuecounter == length): break
-    assert(valuecounter == length)
-    print(MainText)
-    print("    return initOffset;");
-    print("\n}\n\n")
-  print("""
-static __m128i iunpackFOR32(__m128i initvalue , const  __m128i*   in, uint32_t *    _out) {
-	__m128i * mout = (__m128i *)_out;
-	__m128i invec;
-	size_t k;
-	for(k = 0; k < 128/4; ++k) {
-		invec =  _mm_load_si128(in++);
-	    _mm_store_si128(mout++, invec);
-	}
-	return invec;
-}
-  """)
--- a/cpp/simdcomp/simdcomp.def
+++ b/cpp/simdcomp/simdcomp.def
@@ -1,40 +0,0 @@
-EXPORTS
-	simdpack
-	simdpackwithoutmask
-	simdunpack
-	bits
-	maxbits
-	maxbits_length
-	simdmin
-	simdmin_length
-	simdmaxmin
-	simdmaxmin_length
-	simdmaxbitsd1
-	simdmaxbitsd1_length
-	simdpackd1
-	simdpackwithoutmaskd1
-	simdunpackd1
-	simdsearchd1
-	simdsearchwithlengthd1
-	simdselectd1
-	simdpackFOR
-	simdselectFOR
-	simdsearchwithlengthFOR
-	simdunpackFOR
-	simdmin_length
-	simdmaxmin
-	simdmaxmin_length
-	simdpack_length
-	simdpackFOR_length
-	simdunpackFOR_length
-	simdpack_shortlength
-	simdfastsetFOR
-	simdfastset
-	simdfastsetd1
-	simdunpack_length
-	simdunpack_shortlength
-	simdsearchwithlengthFOR
-	simdscand1
-	simdfastsetd1fromprevious
-	simdfastsetd1
-
--- a/cpp/simdcomp/src/avxbitpacking.c
+++ b/cpp/simdcomp/src/avxbitpacking.c
--- a/cpp/simdcomp/src/simdbitpacking.c
+++ b/cpp/simdcomp/src/simdbitpacking.c
--- a/cpp/simdcomp/src/simdcomputil.c
+++ b/cpp/simdcomp/src/simdcomputil.c
@@ -1,234 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-
-#include "simdcomputil.h"
-#ifdef __SSE4_1__
-#include <smmintrin.h>
-#endif
-#include <assert.h>
-
-#define Delta(curr, prev) \
-    _mm_sub_epi32(curr, \
-            _mm_or_si128(_mm_slli_si128(curr, 4), _mm_srli_si128(prev, 12)))
-
-/* returns the integer logarithm of v (bit width) */
-uint32_t bits(const uint32_t v) {
-#ifdef _MSC_VER
-    unsigned long answer;
-    if (v == 0) {
-        return 0;
-    }
-    _BitScanReverse(&answer, v);
-    return answer + 1;
-#else
-    return v == 0 ? 0 : 32 - __builtin_clz(v); /* assume GCC-like compiler if not microsoft */
-#endif
-}
-
-
-
-static uint32_t maxbitas32int(const __m128i accumulator) {
-	const __m128i _tmp1 = _mm_or_si128(_mm_srli_si128(accumulator, 8), accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/
-	const __m128i _tmp2 = _mm_or_si128(_mm_srli_si128(_tmp1, 4), _tmp1); /*  (A,B,C xor A,D xor B) xor  (0,0,0,C xor A)*/
-	uint32_t ans =  _mm_cvtsi128_si32(_tmp2);
-	return bits(ans);
-}
-
-SIMDCOMP_PURE uint32_t maxbits(const uint32_t * begin) {
-	    const __m128i* pin = (const __m128i*)(begin);
-	    __m128i accumulator = _mm_loadu_si128(pin);
-	    uint32_t k = 1;
-	    for(; 4*k < SIMDBlockSize; ++k) {
-	    	__m128i newvec = _mm_loadu_si128(pin+k);
-	        accumulator = _mm_or_si128(accumulator,newvec);
-	    }
-	    return maxbitas32int(accumulator);
-}
-static uint32_t orasint(const __m128i accumulator) {
-	const __m128i _tmp1 = _mm_or_si128(_mm_srli_si128(accumulator, 8), accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/
-	const __m128i _tmp2 = _mm_or_si128(_mm_srli_si128(_tmp1, 4), _tmp1); /*  (A,B,C xor A,D xor B) xor  (0,0,0,C xor A)*/
-	return  _mm_cvtsi128_si32(_tmp2);
-}
-
-#ifdef __SSE4_1__
-
-static uint32_t minasint(const __m128i accumulator) {
-	const __m128i _tmp1 = _mm_min_epu32(_mm_srli_si128(accumulator, 8), accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/
-	const __m128i _tmp2 = _mm_min_epu32(_mm_srli_si128(_tmp1, 4), _tmp1); /*  (A,B,C xor A,D xor B) xor  (0,0,0,C xor A)*/
-	return  _mm_cvtsi128_si32(_tmp2);
-}
-
-static uint32_t maxasint(const __m128i accumulator) {
-	const __m128i _tmp1 = _mm_max_epu32(_mm_srli_si128(accumulator, 8), accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/
-	const __m128i _tmp2 = _mm_max_epu32(_mm_srli_si128(_tmp1, 4), _tmp1); /*  (A,B,C xor A,D xor B) xor  (0,0,0,C xor A)*/
-	return  _mm_cvtsi128_si32(_tmp2);
-}
-
-uint32_t simdmin(const uint32_t * in) {
-    const __m128i* pin = (const __m128i*)(in);
-    __m128i accumulator =  _mm_loadu_si128(pin);
-     uint32_t k = 1;
-     for(; 4*k < SIMDBlockSize; ++k) {
-    	 __m128i newvec = _mm_loadu_si128(pin+k);
-         accumulator = _mm_min_epu32(accumulator,newvec);
-     }
-     return minasint(accumulator);
-}
-
-void simdmaxmin(const uint32_t * in, uint32_t * getmin, uint32_t * getmax) {
-    const __m128i* pin = (const __m128i*)(in);
-    __m128i minaccumulator =  _mm_loadu_si128(pin);
-    __m128i maxaccumulator =  minaccumulator;
-    uint32_t k = 1;
-     for(; 4*k < SIMDBlockSize; ++k) {
-    	 __m128i newvec = _mm_loadu_si128(pin+k);
-         minaccumulator = _mm_min_epu32(minaccumulator,newvec);
-         maxaccumulator = _mm_max_epu32(maxaccumulator,newvec);
-     }
-     *getmin = minasint(minaccumulator);
-     *getmax = maxasint(maxaccumulator);
-}
-
-
-uint32_t simdmin_length(const uint32_t * in, uint32_t length) {
-	uint32_t currentmin = 0xFFFFFFFF;
-	uint32_t lengthdividedby4 = length / 4;
-	uint32_t offset = lengthdividedby4 * 4;
-	uint32_t k;
-	if (lengthdividedby4 > 0) {
-		const __m128i* pin = (const __m128i*)(in);
-		__m128i accumulator = _mm_loadu_si128(pin);
-		k = 1;
-		for(; 4*k < lengthdividedby4 * 4; ++k) {
-			__m128i newvec = _mm_loadu_si128(pin+k);
-			accumulator = _mm_min_epu32(accumulator,newvec);
-		}
-		currentmin = minasint(accumulator);
-	}
-	for (k = offset; k < length; ++k)
-		if (in[k] < currentmin)
-			currentmin = in[k];
-	return currentmin;
-}
-
-void simdmaxmin_length(const uint32_t * in, uint32_t length, uint32_t * getmin, uint32_t * getmax) {
-	uint32_t lengthdividedby4 = length / 4;
-	uint32_t offset = lengthdividedby4 * 4;
-	uint32_t k;
-	*getmin = 0xFFFFFFFF;
-	*getmax = 0;
-	if (lengthdividedby4 > 0) {
-		const __m128i* pin = (const __m128i*)(in);
-		__m128i minaccumulator = _mm_loadu_si128(pin);
-		__m128i maxaccumulator = minaccumulator;
-		k = 1;
-		for(; 4*k < lengthdividedby4 * 4; ++k) {
-			__m128i newvec = _mm_loadu_si128(pin+k);
-			minaccumulator = _mm_min_epu32(minaccumulator,newvec);
-			maxaccumulator = _mm_max_epu32(maxaccumulator,newvec);
-		}
-		*getmin = minasint(minaccumulator);
-		*getmax = maxasint(maxaccumulator);
-	}
-	for (k = offset; k < length; ++k) {
-		if (in[k] < *getmin)
-			*getmin = in[k];
-		if (in[k] > *getmax)
-			*getmax = in[k];
-	}
-}
-
-#endif
-
-SIMDCOMP_PURE uint32_t maxbits_length(const uint32_t * in,uint32_t length) {
-	  uint32_t k;
-	  uint32_t lengthdividedby4 = length / 4;
-	  uint32_t offset = lengthdividedby4 * 4;
-	  uint32_t bigxor = 0;
-	  if(lengthdividedby4 > 0) {
-		    const __m128i* pin = (const __m128i*)(in);
-		    __m128i accumulator = _mm_loadu_si128(pin);
-		    k = 1;
-		    for(; 4*k < 4*lengthdividedby4; ++k) {
-		    	__m128i newvec = _mm_loadu_si128(pin+k);
-		        accumulator = _mm_or_si128(accumulator,newvec);
-		    }
-		    bigxor = orasint(accumulator);
-	  }
-	  for(k = offset; k < length; ++k)
-		  bigxor |= in[k];
-	  return bits(bigxor);
-}
-
-
-/* maxbit over 128 integers (SIMDBlockSize) with provided initial value */
-uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in) {
-    __m128i  initoffset = _mm_set1_epi32 (initvalue);
-    const __m128i* pin = (const __m128i*)(in);
-    __m128i newvec = _mm_loadu_si128(pin);
-    __m128i accumulator = Delta(newvec , initoffset);
-    __m128i oldvec = newvec;
-    uint32_t k = 1;
-    for(; 4*k < SIMDBlockSize; ++k) {
-        newvec = _mm_loadu_si128(pin+k);
-        accumulator = _mm_or_si128(accumulator,Delta(newvec , oldvec));
-        oldvec = newvec;
-    }
-    initoffset = oldvec;
-    return maxbitas32int(accumulator);
-}
-
-
-/* maxbit over |length| integers with provided initial value */
-uint32_t simdmaxbitsd1_length(uint32_t initvalue, const uint32_t * in,
-                uint32_t length) {
-    __m128i newvec;
-    __m128i oldvec;
-    __m128i initoffset;
-    __m128i accumulator;
-    const __m128i *pin;
-    uint32_t tmparray[4];
-    uint32_t k = 1;
-    uint32_t acc;
-
-    assert(length > 0);
-
-    pin = (const __m128i *)(in);
-    initoffset = _mm_set1_epi32(initvalue);
-    switch (length) {
-      case 1:
-        newvec = _mm_set1_epi32(in[0]);
-        break;
-      case 2:
-        newvec = _mm_setr_epi32(in[0], in[1], in[1], in[1]);
-        break;
-      case 3:
-        newvec = _mm_setr_epi32(in[0], in[1], in[2], in[2]);
-        break;
-      default:
-        newvec = _mm_loadu_si128(pin);
-        break;
-    }
-    accumulator = Delta(newvec, initoffset);
-    oldvec = newvec;
-
-    /* process 4 integers and build an accumulator */
-    while (k * 4 + 4 <= length) {
-        newvec = _mm_loadu_si128(pin + k);
-        accumulator = _mm_or_si128(accumulator, Delta(newvec, oldvec));
-        oldvec = newvec;
-        k++;
-    }
-
-    /* extract the accumulator as an integer */
-    _mm_storeu_si128((__m128i *)(tmparray), accumulator);
-    acc = tmparray[0] | tmparray[1] | tmparray[2] | tmparray[3];
-
-    /* now process the remaining integers */
-    for (k *= 4; k < length; k++)
-        acc |= in[k] - (k == 0 ? initvalue : in[k - 1]);
-
-    /* return the number of bits */
-    return bits(acc);
-}
--- a/cpp/simdcomp/src/simdfor.c
+++ b/cpp/simdcomp/src/simdfor.c
--- a/cpp/simdcomp/src/simdintegratedbitpacking.c
+++ b/cpp/simdcomp/src/simdintegratedbitpacking.c
--- a/cpp/simdcomp/src/simdpackedsearch.c
+++ b/cpp/simdcomp/src/simdpackedsearch.c
--- a/cpp/simdcomp/src/simdpackedselect.c
+++ b/cpp/simdcomp/src/simdpackedselect.c
--- a/cpp/simdcomp/tests/unit.c
+++ b/cpp/simdcomp/tests/unit.c
@@ -1,900 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "simdcomp.h"
-
-
-
-int testshortpack() {
-	int bit;
-	size_t i;
-	size_t length;
-	__m128i * bb;
-	srand(0);
-	printf("testshortpack\n");
-	for (bit = 0; bit < 32; ++bit) {
-		const size_t N = 128;
-		uint32_t * data = malloc(N * sizeof(uint32_t));
-		uint32_t * backdata = malloc(N * sizeof(uint32_t));
-		uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
-
-		for (i = 0; i < N; ++i) {
-			data[i] = rand() & ((1 << bit) - 1);
-		}
-		for (length = 0; length <= N; ++length) {
-			for (i = 0; i < N; ++i) {
-				backdata[i] = 0;
-			}
-			bb = simdpack_shortlength(data, length, (__m128i *) buffer,
-					bit);
-			if((bb - (__m128i *) buffer) * sizeof(__m128i) != (unsigned) simdpack_compressedbytes(length,bit)) {
-			 printf("bug\n");
-			 return -1;
-			}
-			simdunpack_shortlength((__m128i *) buffer, length,
-					backdata, bit);
-			for (i = 0; i < length; ++i) {
-
-				if (data[i] != backdata[i]) {
-				    printf("bug\n");
-					return -1;
-				}
-			}
-		}
-		free(data);
-		free(backdata);
-		free(buffer);
-	}
-	return 0;
-}
-
-int testlongpack() {
-	int bit;
-	size_t i;
-	size_t length;
-	__m128i * bb;
-	srand(0);
-	printf("testlongpack\n");
-	for (bit = 0; bit < 32; ++bit) {
-		const size_t N = 2048;
-		uint32_t * data = malloc(N * sizeof(uint32_t));
-		uint32_t * backdata = malloc(N * sizeof(uint32_t));
-		uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
-
-		for (i = 0; i < N; ++i) {
-			data[i] = rand() & ((1 << bit) - 1);
-		}
-		for (length = 0; length <= N; ++length) {
-			for (i = 0; i < N; ++i) {
-				backdata[i] = 0;
-			}
-			bb = simdpack_length(data, length, (__m128i *) buffer,
-					bit);
-			if((bb - (__m128i *) buffer) * sizeof(__m128i) != (unsigned) simdpack_compressedbytes(length,bit)) {
-			 printf("bug\n");
-			 return -1;
-			}
-			simdunpack_length((__m128i *) buffer, length,
-					backdata, bit);
-			for (i = 0; i < length; ++i) {
-
-				if (data[i] != backdata[i]) {
-				    printf("bug\n");
-					return -1;
-				}
-			}
-		}
-		free(data);
-		free(backdata);
-		free(buffer);
-	}
-	return 0;
-}
-
-
-
-int testset() {
-	int bit;
-	size_t i;
-	const size_t N = 128;
-	uint32_t * data = malloc(N * sizeof(uint32_t));
-	uint32_t * backdata = malloc(N * sizeof(uint32_t));
-	uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
-
-	srand(0);
-
-	for (bit = 0; bit < 32; ++bit) {
-		printf("simple set %d \n",bit);
-
-		for (i = 0; i < N; ++i) {
-			data[i] = rand() & ((1 << bit) - 1);
-		}
-		for (i = 0; i < N; ++i) {
-			backdata[i] = 0;
-		}
-		simdpack(data, (__m128i *) buffer, bit);
-		simdunpack((__m128i *) buffer, backdata, bit);
-		for (i = 0; i < N; ++i) {
-			if (data[i] != backdata[i]) {
-			    printf("bug\n");
-				return -1;
-			}
-		}
-
-		for(i = N  ; i > 0; i--) {
-			simdfastset((__m128i *) buffer, bit, data[N - i], i - 1);
-		}
-		simdunpack((__m128i *) buffer, backdata, bit);
-		for (i = 0; i < N; ++i) {
-			if (data[i] != backdata[N - i - 1]) {
-			    printf("bug\n");
-				return -1;
-			}
-		}
-		simdpack(data, (__m128i *) buffer, bit);
-		for(i = 1  ; i <= N; i++) {
-			simdfastset((__m128i *) buffer, bit, data[i - 1], i - 1);
-		}
-		simdunpack((__m128i *) buffer, backdata, bit);
-		for (i = 0; i < N; ++i) {
-			if (data[i] != backdata[i]) {
-			    printf("bug\n");
-				return -1;
-			}
-		}
-
-	}
-	free(data);
-	free(backdata);
-	free(buffer);
-
-	return 0;
-}
-
-#ifdef __SSE4_1__
-
-int testsetd1() {
-	int bit;
-	size_t i;
-	uint32_t newvalue;
-	const size_t N = 128;
-	uint32_t * data = malloc(N * sizeof(uint32_t));
-	uint32_t * datazeroes = malloc(N * sizeof(uint32_t));
-
-	uint32_t * backdata = malloc(N * sizeof(uint32_t));
-	uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
-
-	srand(0);
-	for (bit = 0; bit < 32; ++bit) {
-		printf("simple set d1 %d \n",bit);
-		data[0] = rand() & ((1 << bit) - 1);
-		datazeroes[0] = 0;
-
-		for (i = 1; i < N; ++i) {
-			data[i] = data[i - 1] + (rand() & ((1 << bit) - 1));
-			datazeroes[i] = 0;
-		}
-		for (i = 0; i < N; ++i) {
-			backdata[i] = 0;
-		}
-		simdpackd1(0,datazeroes, (__m128i *) buffer, bit);
- 	    for(i = 1  ; i <= N; i++) {
-			simdfastsetd1(0,(__m128i *) buffer, bit, data[i - 1], i - 1);
-			newvalue = simdselectd1(0, (const __m128i *) buffer, bit,i - 1);
-			if( newvalue != data[i-1] ) {
-				printf("bad set-select\n");
-				return -1;
-			}
-		}
-		simdunpackd1(0,(__m128i *) buffer, backdata, bit);
-		for (i = 0; i < N; ++i) {
-			if (data[i] != backdata[i])
-				return -1;
-		}
-	}
-	free(data);
-	free(backdata);
-	free(buffer);
-        free(datazeroes);
-	return 0;
-}
-#endif
-
-int testsetFOR() {
-	int bit;
-	size_t i;
-	uint32_t newvalue;
-	const size_t N = 128;
-	uint32_t * data = malloc(N * sizeof(uint32_t));
-	uint32_t * datazeroes = malloc(N * sizeof(uint32_t));
-
-	uint32_t * backdata = malloc(N * sizeof(uint32_t));
-	uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
-
-	srand(0);
-	for (bit = 0; bit < 32; ++bit) {
-		printf("simple set FOR %d \n",bit);
-		for (i = 0; i < N; ++i) {
-			data[i] = (rand() & ((1 << bit) - 1));
-			datazeroes[i] = 0;
-		}
-		for (i = 0; i < N; ++i) {
-			backdata[i] = 0;
-		}
-		simdpackFOR(0,datazeroes, (__m128i *) buffer, bit);
- 	    for(i = 1  ; i <= N; i++) {
- 	    	simdfastsetFOR(0,(__m128i *) buffer, bit, data[i - 1], i - 1);
-			newvalue = simdselectFOR(0, (const __m128i *) buffer, bit,i - 1);
-			if( newvalue != data[i-1] ) {
-				printf("bad set-select\n");
-				return -1;
-			}
-		}
-		simdunpackFOR(0,(__m128i *) buffer, backdata, bit);
-		for (i = 0; i < N; ++i) {
-			if (data[i] != backdata[i])
-				return -1;
-		}
-	}
-	free(data);
-	free(backdata);
-	free(buffer);
-        free(datazeroes);
-	return 0;
-}
-
-int testshortFORpack() {
-	int bit;
-	size_t i;
-	__m128i * rb;
-	size_t length;
-	uint32_t offset = 7;
-	srand(0);
-	for (bit = 0; bit < 32; ++bit) {
-		const size_t N = 128;
-		uint32_t * data = malloc(N * sizeof(uint32_t));
-		uint32_t * backdata = malloc(N * sizeof(uint32_t));
-		uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
-
-		for (i = 0; i < N; ++i) {
-			data[i] = (rand() & ((1 << bit) - 1)) + offset;
-		}
-		for (length = 0; length <= N; ++length) {
-			for (i = 0; i < N; ++i) {
-				backdata[i] = 0;
-			}
-			rb = simdpackFOR_length(offset,data, length, (__m128i *) buffer,
-					bit);
-		    if(((rb - (__m128i *) buffer)*sizeof(__m128i)) != (unsigned) simdpackFOR_compressedbytes(length,bit)) {
-		      return -1;
-		    }
-			simdunpackFOR_length(offset,(__m128i *) buffer, length,
-					backdata, bit);
-			for (i = 0; i < length; ++i) {
-
-				if (data[i] != backdata[i])
-					return -1;
-			}
-		}
-		free(data);
-		free(backdata);
-		free(buffer);
-	}
-	return 0;
-}
-
-
-#ifdef __AVX2__
-
-int testbabyavx() {
-	int bit;
-	int trial;
-	unsigned int i,j;
-	const size_t N = AVXBlockSize;
-	srand(0);
-	printf("testbabyavx\n");
-	printf("bit = ");
-	for (bit = 0; bit < 32; ++bit) {
-		printf(" %d ",bit);
-		fflush(stdout);
-		for(trial = 0; trial < 100; ++trial) {
-			uint32_t * data = malloc(N * sizeof(uint32_t)+ 64 * sizeof(uint32_t));
-			uint32_t * backdata = malloc(N * sizeof(uint32_t) + 64 * sizeof(uint32_t) );
-			__m256i * buffer = malloc((2 * N + 1024) * sizeof(uint32_t) + 32);
-
-			for (i = 0; i < N; ++i) {
-				data[i] = rand() & ((uint32_t)(1 << bit) - 1);
-			}
-			for (i = 0; i < N; ++i) {
-				backdata[i] = 0;
-			}
-            if(avxmaxbits(data) != maxbits_length(data,N)) {
-            	printf("avxmaxbits is buggy\n");
-				return -1;
-            }
-
-			avxpackwithoutmask(data, buffer, bit);
-			avxunpack(buffer, backdata, bit);
-			for (i = 0; i < AVXBlockSize; ++i) {
-				if (data[i] != backdata[i]) {
-					printf("bug\n");
-					for (j = 0; j < N; ++j) {
-						if (data[j] != backdata[j]) {
-							printf("data[%d]=%d v.s. backdata[%d]=%d\n",j,data[j],j,backdata[j]);
-						} else {
-							printf("data[%d]=%d\n",j,data[j]);
-						}
-					}
-					return -1;
-				}
-			}
-			free(data);
-			free(backdata);
-			free(buffer);
-		}
-	}
-	printf("\n");
-	return 0;
-}
-
-int testavx2() {
-    int N = 5000 * AVXBlockSize, gap;
-    __m256i * buffer = malloc(AVXBlockSize * sizeof(uint32_t));
-    uint32_t * datain = malloc(N * sizeof(uint32_t));
-    uint32_t * backbuffer = malloc(AVXBlockSize * sizeof(uint32_t));
-    for (gap = 1; gap <= 387420489; gap *= 3) {
-        int k;
-        printf(" gap = %u \n", gap);
-        for (k = 0; k < N; ++k)
-            datain[k] = k * gap;
-        for (k = 0; k * AVXBlockSize < N; ++k) {
-            /*
-               First part works for general arrays (sorted or unsorted)
-            */
-            int j;
-       	    /* we compute the bit width */
-            const uint32_t b = avxmaxbits(datain + k * AVXBlockSize);
-            if(avxmaxbits(datain + k * AVXBlockSize) != maxbits_length(datain + k * AVXBlockSize,AVXBlockSize)) {
-            	printf("avxmaxbits is buggy %d %d \n",
-            			avxmaxbits(datain + k * AVXBlockSize),
-						maxbits_length(datain + k * AVXBlockSize,AVXBlockSize));
-				return -1;
-            }
-            printf("bit width = %d\n",b);
-
-
-            /* we read 256 integers at "datain + k * AVXBlockSize" and
-               write b 256-bit vectors at "buffer" */
-            avxpackwithoutmask(datain + k * AVXBlockSize, buffer, b);
-            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
-			avxunpack(buffer, backbuffer, b);/* uncompressed */
-			for (j = 0; j < AVXBlockSize; ++j) {
-				if (backbuffer[j] != datain[k * AVXBlockSize + j]) {
-					int i;
-					printf("bug in avxpack\n");
-					for(i = 0; i < AVXBlockSize; ++i) {
-						printf("data[%d]=%d got back %d %s\n",i,
-								datain[k * AVXBlockSize + i],backbuffer[i],
-								datain[k * AVXBlockSize + i]!=backbuffer[i]?"bug":"");
-					}
-					return -2;
-				}
-			}
-        }
-    }
-    free(buffer);
-    free(datain);
-    free(backbuffer);
-    printf("Code looks good.\n");
-    return 0;
-}
-#endif /* avx2 */
-
-int test() {
-    int N = 5000 * SIMDBlockSize, gap;
-    __m128i * buffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-    uint32_t * datain = malloc(N * sizeof(uint32_t));
-    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-    for (gap = 1; gap <= 387420489; gap *= 3) {
-        int k;
-        printf(" gap = %u \n", gap);
-        for (k = 0; k < N; ++k)
-            datain[k] = k * gap;
-        for (k = 0; k * SIMDBlockSize < N; ++k) {
-            /*
-               First part works for general arrays (sorted or unsorted)
-            */
-            int j;
-       	    /* we compute the bit width */
-            const uint32_t b = maxbits(datain + k * SIMDBlockSize);
-            /* we read 128 integers at "datain + k * SIMDBlockSize" and
-               write b 128-bit vectors at "buffer" */
-            simdpackwithoutmask(datain + k * SIMDBlockSize, buffer, b);
-            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
-            simdunpack(buffer, backbuffer, b);/* uncompressed */
-            for (j = 0; j < SIMDBlockSize; ++j) {
-                if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
-                    printf("bug in simdpack\n");
-                    return -2;
-                }
-            }
-
-	    {
-                /*
-                 next part assumes that the data is sorted (uses differential coding)
-                */
-                uint32_t offset = 0;
-                /* we compute the bit width */
-                const uint32_t b1 = simdmaxbitsd1(offset,
-                    datain + k * SIMDBlockSize);
-               /* we read 128 integers at "datain + k * SIMDBlockSize" and
-                  write b1 128-bit vectors at "buffer" */
-               simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, buffer,
-                    b1);
-               /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
-               simdunpackd1(offset, buffer, backbuffer, b1);
-               for (j = 0; j < SIMDBlockSize; ++j) {
-                   if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
-                       printf("bug in simdpack d1\n");
-                       return -3;
-                   }
-               }
-               offset = datain[k * SIMDBlockSize + SIMDBlockSize - 1];
-	    }
-        }
-    }
-    free(buffer);
-    free(datain);
-    free(backbuffer);
-    printf("Code looks good.\n");
-    return 0;
-}
-
-#ifdef __SSE4_1__
-int testFOR() {
-    int N = 5000 * SIMDBlockSize, gap;
-    __m128i * buffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-    uint32_t * datain = malloc(N * sizeof(uint32_t));
-    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-    uint32_t tmax, tmin, tb;
-    for (gap = 1; gap <= 387420489; gap *= 2) {
-        int k;
-        printf(" gap = %u \n", gap);
-        for (k = 0; k < N; ++k)
-            datain[k] = k * gap;
-        for (k = 0; k * SIMDBlockSize < N; ++k) {
-            int j;
-            simdmaxmin_length(datain + k * SIMDBlockSize,SIMDBlockSize,&tmin,&tmax);
-       	    /* we compute the bit width */
-            tb  = bits(tmax - tmin);
-
-
-            /* we read 128 integers at "datain + k * SIMDBlockSize" and
-               write b 128-bit vectors at "buffer" */
-            simdpackFOR(tmin,datain + k * SIMDBlockSize, buffer, tb);
-
-            for (j = 0; j < SIMDBlockSize; ++j) {
-                        uint32_t selectedvalue = simdselectFOR(tmin,buffer,tb,j);
-                    	if (selectedvalue != datain[k * SIMDBlockSize + j]) {
-                            printf("bug in simdselectFOR\n");
-                            return -3;
-                        }
-            }
-            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
-            simdunpackFOR(tmin,buffer, backbuffer, tb);/* uncompressed */
-            for (j = 0; j < SIMDBlockSize; ++j) {
-            	if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
-                    printf("bug in simdpackFOR\n");
-                    return -2;
-                }
-            }
-        }
-    }
-    free(buffer);
-    free(datain);
-    free(backbuffer);
-    printf("Code looks good.\n");
-    return 0;
-}
-#endif
-
-#define MAX 300
-int test_simdmaxbitsd1_length() {
-    uint32_t result, buffer[MAX + 1];
-    int i, j;
-
-    memset(&buffer[0], 0xff, sizeof(buffer));
-
-    /* this test creates buffers of different length; each buffer is
-     * initialized to result in the following deltas:
-     * length 1: 2
-     * length 2: 1 2
-     * length 3: 1 1 2
-     * length 4: 1 1 1 2
-     * length 5: 1 1 1 1 2
-     * etc. Each sequence's "maxbits" is 2. */
-    for (i = 0; i < MAX; i++) {
-      for (j = 0; j < i; j++)
-        buffer[j] = j + 1;
-      buffer[i] = i + 2;
-
-      result = simdmaxbitsd1_length(0, &buffer[0], i + 1);
-      if (result != 2) {
-        printf("simdmaxbitsd1_length: unexpected result %u in loop %d\n",
-                result, i);
-        return -1;
-      }
-    }
-    printf("simdmaxbitsd1_length: ok\n");
-    return 0;
-}
-
-int uint32_cmp(const void *a, const void *b)
-{
-    const uint32_t *ia = (const uint32_t *)a;
-    const uint32_t *ib = (const uint32_t *)b;
-    if(*ia < *ib)
-    	return -1;
-    else if (*ia > *ib)
-    	return 1;
-    return 0;
-}
-
-#ifdef __SSE4_1__
-int test_simdpackedsearch() {
-    uint32_t buffer[128];
-    uint32_t result = 0;
-    int b, i;
-    uint32_t init = 0;
-    __m128i initial = _mm_set1_epi32(init);
-
-    /* initialize the buffer */
-    for (i = 0; i < 128; i++)
-        buffer[i] = (uint32_t)(i + 1);
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 1; b <= 32; b++) {
-        uint32_t out[128];
-        /* delta-encode to 'i' bits */
-        simdpackwithoutmaskd1(init, buffer, (__m128i *)out, b);
-        initial = _mm_setzero_si128();
-        printf("simdsearchd1: %d bits\n", b);
-
-        /* now perform the searches */
-        initial = _mm_set1_epi32(init);
-        assert(simdsearchd1(&initial, (__m128i *)out, b, 0, &result) == 0);
-        assert(result > 0);
-
-        for (i = 1; i <= 128; i++) {
-        	initial = _mm_set1_epi32(init);
-            assert(simdsearchd1(&initial, (__m128i *)out, b,
-                                    (uint32_t)i, &result) == i - 1);
-            assert(result == (unsigned)i);
-        }
-        initial = _mm_set1_epi32(init);
-        assert(simdsearchd1(&initial, (__m128i *)out, b, 200, &result)
-                        == 128);
-        assert(result > 200);
-    }
-    printf("simdsearchd1: ok\n");
-    return 0;
-}
-
-int test_simdpackedsearchFOR() {
-    uint32_t buffer[128];
-    uint32_t result = 0;
-    int b;
-    uint32_t i;
-    uint32_t maxv, tmin, tmax, tb;
-    uint32_t out[128];
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 1; b <= 32; b++) {
-        /* initialize the buffer */
-    	maxv = (b == 32)
-    			? 0xFFFFFFFF
-    					: ((1U<<b) - 1);
-        for (i = 0; i < 128; i++)
-            buffer[i] = maxv * (i + 1) / 128;
-        simdmaxmin_length(buffer,SIMDBlockSize,&tmin,&tmax);
-   	    /* we compute the bit width */
-        tb  = bits(tmax - tmin);
-        /* delta-encode to 'i' bits */
-        simdpackFOR(tmin, buffer, (__m128i *)out, tb);
-        printf("simdsearchd1: %d bits\n", b);
-
-        /* now perform the searches */
-        for (i = 0; i < 128; i++) {
-        	assert(buffer[i] == simdselectFOR(tmin, (__m128i *)out, tb,i));
-        }
-        for (i = 0; i < 128; i++) {
-            int x = simdsearchwithlengthFOR(tmin, (__m128i *)out, tb,
-                                    128,buffer[i], &result) ;
-            assert(simdselectFOR(tmin, (__m128i *)out, tb,x) == buffer[x]);
-            assert(simdselectFOR(tmin, (__m128i *)out, tb,x) == result);
-            assert(buffer[x] == result);
-            assert(result == buffer[i]);
-            assert(buffer[x] == buffer[i]);
-        }
-    }
-    printf("simdsearchFOR: ok\n");
-    return 0;
-}
-
-int test_simdpackedsearch_advanced() {
-    uint32_t buffer[128];
-    uint32_t backbuffer[128];
-	uint32_t out[128];
-    uint32_t result = 0;
-    uint32_t b, i;
-    uint32_t init = 0;
-    __m128i initial = _mm_set1_epi32(init);
-
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 0; b <= 32; b++) {
-    	uint32_t prev = init;
-        /* initialize the buffer */
-        for (i = 0; i < 128; i++) {
-            buffer[i] =  ((uint32_t)(1431655765 * i + 0xFFFFFFFF)) ;
-            if(b < 32) buffer[i] %= (1<<b);
-        }
-
-        qsort(buffer,128, sizeof(uint32_t), uint32_cmp);
-
-        for (i = 0; i < 128; i++) {
-           buffer[i] = buffer[i] + prev;
-           prev = buffer[i];
-        }
-        for (i = 1; i < 128; i++) {
-        	if(buffer[i] < buffer[i-1] )
-        		buffer[i] = buffer[i-1];
-        }
-        assert(simdmaxbitsd1(init, buffer)<=b);
-        for (i = 0; i < 128; i++) {
-        	out[i] = 0; /* memset would do too */
-        }
-
-        /* delta-encode to 'i' bits */
-        simdpackwithoutmaskd1(init, buffer, (__m128i *)out, b);
-        simdunpackd1(init,  (__m128i *)out, backbuffer, b);
-
-        for (i = 0; i < 128; i++) {
-        	assert(buffer[i] == backbuffer[i]);
-        }
-
-        printf("advanced simdsearchd1: %d bits\n", b);
-
-        for (i = 0; i < 128; i++) {
-        	int pos;
-            initial = _mm_set1_epi32(init);
-        	pos = simdsearchd1(&initial, (__m128i *)out, b,
-                    buffer[i], &result);
-        	assert(pos == simdsearchwithlengthd1(init, (__m128i *)out, b, 128,
-                    buffer[i], &result));
-        	assert(buffer[pos] == buffer[i]);
-            if(pos > 0)
-            	assert(buffer[pos - 1] < buffer[i]);
-            assert(result == buffer[i]);
-        }
-        for (i = 0; i < 128; i++) {
-        	int pos;
-        	if(buffer[i] == 0) continue;
-        	initial = _mm_set1_epi32(init);
-        	pos = simdsearchd1(&initial, (__m128i *)out, b,
-                    buffer[i] - 1, &result);
-        	assert(pos == simdsearchwithlengthd1(init, (__m128i *)out, b, 128,
-                    buffer[i] - 1, &result));
-        	assert(buffer[pos] >= buffer[i]  - 1);
-            if(pos > 0)
-            	assert(buffer[pos - 1] < buffer[i]  - 1);
-            assert(result == buffer[pos]);
-        }
-		for (i = 0; i < 128; i++) {
-			int pos;
-			if (buffer[i] + 1 == 0)
-				continue;
-			initial = _mm_set1_epi32(init);
-			pos = simdsearchd1(&initial, (__m128i *) out, b,
-					buffer[i] + 1, &result);
-			assert(pos == simdsearchwithlengthd1(init, (__m128i *)out, b, 128,
-                    buffer[i] + 1, &result));
-			if(pos == 128) {
-				assert(buffer[i] == buffer[127]);
-			} else {
-			  assert(buffer[pos] >= buffer[i] + 1);
-			  if (pos > 0)
-				assert(buffer[pos - 1] < buffer[i] + 1);
-			  assert(result == buffer[pos]);
-			}
-		}
-    }
-    printf("advanced simdsearchd1: ok\n");
-    return 0;
-}
-
-int test_simdpackedselect() {
-    uint32_t buffer[128];
-    uint32_t initial = 33;
-    int b, i;
-
-    /* initialize the buffer */
-    for (i = 0; i < 128; i++)
-        buffer[i] = (uint32_t)(initial + i);
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 1; b <= 32; b++) {
-        uint32_t out[128];
-        /* delta-encode to 'i' bits */
-        simdpackwithoutmaskd1(initial, buffer, (__m128i *)out, b);
-
-        printf("simdselectd1: %d bits\n", b);
-
-        /* now perform the searches */
-        for (i = 0; i < 128; i++) {
-            assert(simdselectd1(initial, (__m128i *)out, b, (uint32_t)i)
-                            == initial + i);
-        }
-    }
-    printf("simdselectd1: ok\n");
-    return 0;
-}
-
-int test_simdpackedselect_advanced() {
-    uint32_t buffer[128];
-    uint32_t initial = 33;
-    uint32_t b;
-    int i;
-
-    /* this test creates delta encoded buffers with different bits, then
-     * performs lower bound searches for each key */
-    for (b = 0; b <= 32; b++) {
-        uint32_t prev = initial;
-    	uint32_t out[128];
-        /* initialize the buffer */
-        for (i = 0; i < 128; i++) {
-            buffer[i] =  ((uint32_t)(165576 * i)) ;
-            if(b < 32) buffer[i] %= (1<<b);
-        }
-        for (i = 0; i < 128; i++) {
-           buffer[i] = buffer[i] + prev;
-           prev = buffer[i];
-        }
-
-        for (i = 1; i < 128; i++) {
-        	if(buffer[i] < buffer[i-1] )
-        		buffer[i] = buffer[i-1];
-        }
-        assert(simdmaxbitsd1(initial, buffer)<=b);
-
-        for (i = 0; i < 128; i++) {
-        	out[i] = 0; /* memset would do too */
-        }
-
-        /* delta-encode to 'i' bits */
-        simdpackwithoutmaskd1(initial, buffer, (__m128i *)out, b);
-
-        printf("simdselectd1: %d bits\n", b);
-
-        /* now perform the searches */
-        for (i = 0; i < 128; i++) {
-        	uint32_t valretrieved = simdselectd1(initial, (__m128i *)out, b, (uint32_t)i);
-            assert(valretrieved == buffer[i]);
-        }
-    }
-    printf("advanced simdselectd1: ok\n");
-    return 0;
-}
-#endif
-
-
-int main() {
-    int r;
-    r =  testsetFOR();
-    if (r) {
-         printf("test failure 1\n");
-         return r;
-    }
-
-#ifdef __SSE4_1__
-    r =  testsetd1();
-    if (r) {
-         printf("test failure 2\n");
-         return r;
-    }
-#endif
-    r =  testset();
-    if (r) {
-         printf("test failure 3\n");
-         return r;
-    }
-
-    r = testshortFORpack();
-    if (r) {
-         printf("test failure 4\n");
-         return r;
-    }
-    r = testshortpack();
-    if (r) {
-         printf("test failure 5\n");
-         return r;
-    }
-    r = testlongpack();
-    if (r) {
-         printf("test failure 6\n");
-         return r;
-    }
-#ifdef __SSE4_1__
-    r = test_simdpackedsearchFOR();
-    if (r) {
-         printf("test failure 7\n");
-         return r;
-    }
-
-    r = testFOR();
-    if (r) {
-         printf("test failure 8\n");
-         return r;
-    }
-#endif
-#ifdef __AVX2__
-    r= testbabyavx();
-    if (r) {
-         printf("test failure baby avx\n");
-         return r;
-    }
-
-    r = testavx2();
-    if (r) {
-         printf("test failure 9 avx\n");
-         return r;
-    }
-#endif
-    r = test();
-    if (r) {
-         printf("test failure 9\n");
-         return r;
-    }
-
-    r = test_simdmaxbitsd1_length();
-    if (r) {
-         printf("test failure 10\n");
-         return r;
-    }
-#ifdef __SSE4_1__
-    r = test_simdpackedsearch();
-    if (r) {
-         printf("test failure 11\n");
-         return r;
-    }
-
-    r = test_simdpackedsearch_advanced();
-    if (r) {
-         printf("test failure 12\n");
-         return r;
-    }
-
-    r = test_simdpackedselect();
-    if (r) {
-         printf("test failure 13\n");
-         return r;
-    }
-
-    r = test_simdpackedselect_advanced();
-    if (r) {
-         printf("test failure 14\n");
-         return r;
-    }
-#endif
-    printf("All tests OK!\n");
-
-
-    return 0;
-}
--- a/cpp/simdcomp/tests/unit_chars.c
+++ b/cpp/simdcomp/tests/unit_chars.c
@@ -1,102 +0,0 @@
-/**
- * This code is released under a BSD License.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include "simdcomp.h"
-
-
-#define get_random_char() (uint8_t)(rand() % 256);
-
-
-int main() {
-    int N = 5000 * SIMDBlockSize, gap;
-    __m128i * buffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-    uint32_t * datain = malloc(N * sizeof(uint32_t));
-    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
-
-    srand(time(NULL));
-
-    for (gap = 1; gap <= 387420489; gap *= 3) {
-        int k;
-        printf(" gap = %u \n", gap);
-
-    /* simulate some random character string, don't care about endiannes */
-        for (k = 0; k < N; ++k) {
-        uint8_t _tmp[4];
- 
-            _tmp[0] = get_random_char();
-            _tmp[1] = get_random_char();
-            _tmp[2] = get_random_char();
-            _tmp[3] = get_random_char();
-
-            memmove(&datain[k], _tmp, 4);
-        }
-        for (k = 0; k * SIMDBlockSize < N; ++k) {
-            /*
-               First part works for general arrays (sorted or unsorted)
-            */
-            int j;
-               /* we compute the bit width */
-            const uint32_t b = maxbits(datain + k * SIMDBlockSize);
-            /* we read 128 integers at "datain + k * SIMDBlockSize" and
-               write b 128-bit vectors at "buffer" */
-            simdpackwithoutmask(datain + k * SIMDBlockSize, buffer, b);
-            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
-            simdunpack(buffer, backbuffer, b);/* uncompressed */
-            for (j = 0; j < SIMDBlockSize; ++j) {
-                uint8_t chars_back[4];
-                uint8_t chars_in[4];
-
-                memmove(chars_back, &backbuffer[j], 4);
-                memmove(chars_in, &datain[k * SIMDBlockSize + j], 4);
-
-                if (chars_in[0] != chars_back[0]
-                    || chars_in[1] != chars_back[1]
-                    || chars_in[2] != chars_back[2]
-                    || chars_in[3] != chars_back[3]) {
-                    printf("bug in simdpack\n");
-                    return -2;
-                }
-            }
-
-            {
-                /*
-                 next part assumes that the data is sorted (uses differential coding)
-                */
-                uint32_t offset = 0;
-                /* we compute the bit width */
-                const uint32_t b1 = simdmaxbitsd1(offset,
-                datain + k * SIMDBlockSize);
-                   /* we read 128 integers at "datain + k * SIMDBlockSize" and
-                  write b1 128-bit vectors at "buffer" */
-                   simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, buffer,
-                b1);
-                   /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
-                   simdunpackd1(offset, buffer, backbuffer, b1);
-                for (j = 0; j < SIMDBlockSize; ++j) {
-                    uint8_t chars_back[4];
-                    uint8_t chars_in[4];
-
-                    memmove(chars_back, &backbuffer[j], 4);
-                    memmove(chars_in, &datain[k * SIMDBlockSize + j], 4);
-
-                    if (chars_in[0] != chars_back[0]
-                        || chars_in[1] != chars_back[1]
-                        || chars_in[2] != chars_back[2]
-                        || chars_in[3] != chars_back[3]) {
-                        printf("bug in simdpack\n");
-                        return -3;
-                    }
-                }
-                offset = datain[k * SIMDBlockSize + SIMDBlockSize - 1];
-            }
-        }
-    }
-    free(buffer);
-    free(datain);
-    free(backbuffer);
-    printf("Code looks good.\n");
-    return 0;
-}
--- a/cpp/simdcomp_wrapper.c
+++ b/cpp/simdcomp_wrapper.c
@@ -1,42 +0,0 @@
-#include "simdcomp.h"
-#include "simdcomputil.h"
-
-// assumes datain has a size of 128 uint32
-// and that buffer is large enough to host the data.
-size_t compress_sorted(
-        const uint32_t* datain,
-        uint8_t* output,
-        const uint32_t offset) {
-    const uint32_t b = simdmaxbitsd1(offset, datain);
-    *output++ = b;
-    simdpackwithoutmaskd1(offset, datain, (__m128i *) output,  b);
-    return 1 + b * sizeof(__m128i);
-}
-
-// assumes datain has a size of 128 uint32
-// and that buffer is large enough to host the data.
-size_t uncompress_sorted(
-        const uint8_t* compressed_data, 
-        uint32_t* output, 
-        uint32_t offset) {
-    const uint32_t b = *compressed_data++;
-    simdunpackd1(offset, (__m128i *)compressed_data, output, b);
-    return 1 + b * sizeof(__m128i);
-}
-
-size_t compress_unsorted(
-        const uint32_t* datain,
-        uint8_t* output) {
-    const uint32_t b = maxbits(datain);
-    *output++ = b;
-    simdpackwithoutmask(datain, (__m128i *) output,  b);
-    return 1 + b * sizeof(__m128i);
-}
-
-size_t uncompress_unsorted(
-        const uint8_t* compressed_data, 
-        uint32_t* output) {
-    const uint32_t b = *compressed_data++;
-    simdunpack((__m128i *)compressed_data, output, b);
-    return 1 + b * sizeof(__m128i);
-}
--- a/cpp/streamvbyte/.gitignore
+++ b/cpp/streamvbyte/.gitignore
@@ -1,32 +0,0 @@
-# Object files
-*.o
-*.ko
-*.obj
-*.elf
-
-# Precompiled Headers
-*.gch
-*.pch
-
-# Libraries
-*.lib
-*.a
-*.la
-*.lo
-
-# Shared objects (inc. Windows DLLs)
-*.dll
-*.so
-*.so.*
-*.dylib
-
-# Executables
-*.exe
-*.out
-*.app
-*.i*86
-*.x86_64
-*.hex
-
-# Debug files
-*.dSYM/
--- a/cpp/streamvbyte/.travis.yml
+++ b/cpp/streamvbyte/.travis.yml
@@ -1,7 +0,0 @@
-language: c
-sudo: false
-compiler:
-  - gcc
-  - clang
-
-script: make && ./unit
--- a/cpp/streamvbyte/LICENSE
+++ b/cpp/streamvbyte/LICENSE
@@ -1,202 +0,0 @@
-Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
--- a/cpp/streamvbyte/README.md
+++ b/cpp/streamvbyte/README.md
@@ -1,60 +0,0 @@
-streamvbyte
-===========
-[![Build Status](https://travis-ci.org/lemire/streamvbyte.png)](https://travis-ci.org/lemire/streamvbyte)
-
-StreamVByte is a new integer compression technique that applies SIMD instructions (vectorization) to
-Google's Group Varint approach. The net result is faster than other byte-oriented compression
-techniques.
-
-The approach is patent-free, the code is available under the Apache License.
-
-
-It includes fast differential coding.
-
-It assumes a recent Intel processor (e.g., haswell or better) .
-
-The code should build using most standard-compliant C99 compilers. The provided makefile
-expects a Linux-like system.
-
-
-Usage:
-
-      make
-      ./unit
-
-See example.c for an example.
-
-Short code sample:
-```C
-// suppose that datain is an array of uint32_t integers
-size_t compsize = streamvbyte_encode(datain, N, compressedbuffer); // encoding
-// here the result is stored in compressedbuffer using compsize bytes
-streamvbyte_decode(compressedbuffer, recovdata, N); // decoding (fast)
-```
-
-If the values are sorted, then it might be preferable to use differential coding:
-```C
-// suppose that datain is an array of uint32_t integers
-size_t compsize = streamvbyte_delta_encode(datain, N, compressedbuffer,0); // encoding
-// here the result is stored in compressedbuffer using compsize bytes
-streamvbyte_delta_decode(compressedbuffer, recovdata, N,0); // decoding (fast)
-```
-You have to know how many integers were coded when you decompress. You can store this 
-information along with the compressed stream.
-
-See also
--------
-* SIMDCompressionAndIntersection: A C++ library to compress and intersect sorted lists of integers using SIMD instructions https://github.com/lemire/SIMDCompressionAndIntersect
-* The FastPFOR C++ library : Fast integer compression https://github.com/lemire/FastPFor
-* High-performance dictionary coding https://github.com/lemire/dictionary
-* LittleIntPacker: C library to pack and unpack short arrays of integers as fast as possible https://github.com/lemire/LittleIntPacker
-* The SIMDComp library: A simple C library for compressing lists of integers using binary packing https://github.com/lemire/simdcomp
-* MaskedVByte: Fast decoder for VByte-compressed integers https://github.com/lemire/MaskedVByte
-* CSharpFastPFOR: A C#  integer compression library  https://github.com/Genbox/CSharpFastPFOR
-* JavaFastPFOR: A java integer compression library https://github.com/lemire/JavaFastPFOR
-* Encoding: Integer Compression Libraries for Go https://github.com/zhenjl/encoding
-* FrameOfReference is a C++ library dedicated to frame-of-reference (FOR) compression: https://github.com/lemire/FrameOfReference
-* libvbyte: A fast implementation for varbyte 32bit/64bit integer compression https://github.com/cruppstahl/libvbyte
-* TurboPFor is a C library that offers lots of interesting optimizations. Well worth checking! (GPL license) https://github.com/powturbo/TurboPFor
-* Oroch is a C++ library that offers a usable API (MIT license) https://github.com/ademakov/Oroch
-
--- a/cpp/streamvbyte/example.c
+++ b/cpp/streamvbyte/example.c
@@ -1,24 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#include "streamvbyte.h"
-
-int main() {
-	int N = 5000;
-	uint32_t * datain = malloc(N * sizeof(uint32_t));
-	uint8_t * compressedbuffer = malloc(N * sizeof(uint32_t));
-	uint32_t * recovdata = malloc(N * sizeof(uint32_t));
-	for (int k = 0; k < N; ++k)
-		datain[k] = 120;
-	size_t compsize = streamvbyte_encode(datain, N, compressedbuffer); // encoding
-	// here the result is stored in compressedbuffer using compsize bytes
-	size_t compsize2 = streamvbyte_decode(compressedbuffer, recovdata,
-					N); // decoding (fast)
-	assert(compsize == compsize2);
-	free(datain);
-	free(compressedbuffer);
-	free(recovdata);
-	printf("Compressed %d integers down to %d bytes.\n",N,(int) compsize);
-	return 0;
-}
--- a/cpp/streamvbyte/include/streamvbyte.h
+++ b/cpp/streamvbyte/include/streamvbyte.h
@@ -1,19 +0,0 @@
-
-#ifndef VARINTDECODE_H_
-#define VARINTDECODE_H_
-#define __STDC_FORMAT_MACROS
-#include <inttypes.h>
-#include <stdint.h>// please use a C99-compatible compiler
-#include <stddef.h>
-
-
-// Encode an array of a given length read from in to bout in varint format.
-// Returns the number of bytes written.
-size_t streamvbyte_encode(const uint32_t *in, uint32_t length, uint8_t *out);
-
-// Read "length" 32-bit integers in varint format from in, storing the result in out.
-// Returns the number of bytes read.
-size_t streamvbyte_decode(const uint8_t* in, uint32_t* out, uint32_t length);
-
-
-#endif /* VARINTDECODE_H_ */
--- a/cpp/streamvbyte/include/streamvbytedelta.h
+++ b/cpp/streamvbyte/include/streamvbytedelta.h
@@ -1,24 +0,0 @@
-/*
- * streamvbytedelta.h
- *
- *  Created on: Apr 14, 2016
- *      Author: lemire
- */
-
-#ifndef INCLUDE_STREAMVBYTEDELTA_H_
-#define INCLUDE_STREAMVBYTEDELTA_H_
-
-
-// Encode an array of a given length read from in to bout in StreamVByte format.
-// Returns the number of bytes written.
-// this version uses differential coding (coding differences between values) starting at prev (you can often set prev to zero)
-size_t streamvbyte_delta_encode(const uint32_t *in, uint32_t length, uint8_t *out, uint32_t  prev);
-
-// Read "length" 32-bit integers in StreamVByte format from in, storing the result in out.
-// Returns the number of bytes read.
-// this version uses differential coding (coding differences between values) starting at prev (you can often set prev to zero)
-size_t streamvbyte_delta_decode(const uint8_t* in, uint32_t* out, uint32_t length, uint32_t  prev);
-
-
-
-#endif /* INCLUDE_STREAMVBYTEDELTA_H_ */
--- a/cpp/streamvbyte/makefile
+++ b/cpp/streamvbyte/makefile
@@ -1,58 +0,0 @@
-# minimalist makefile
-.SUFFIXES:
-#
-.SUFFIXES: .cpp .o .c .h
-
-CFLAGS = -fPIC -march=native -std=c99 -O3 -Wall -Wextra -pedantic -Wshadow
-LDFLAGS = -shared
-LIBNAME=libstreamvbyte.so.0.0.1
-all:  unit $(LIBNAME)
-test:
-	./unit
-install: $(OBJECTS)
-	cp $(LIBNAME) /usr/local/lib
-	ln -s /usr/local/lib/$(LIBNAME) /usr/local/lib/libstreamvbyte.so
-	ldconfig
-	cp $(HEADERS) /usr/local/include
-
-
-
-HEADERS=./include/streamvbyte.h ./include/streamvbytedelta.h 
-
-uninstall:
-	for h in $(HEADERS) ; do rm  /usr/local/$$h; done
-	rm  /usr/local/lib/$(LIBNAME)
-	rm /usr/local/lib/libstreamvbyte.so
-	ldconfig
-
-
-OBJECTS= streamvbyte.o streamvbytedelta.o
-
-
-
-streamvbytedelta.o: ./src/streamvbytedelta.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/streamvbytedelta.c -Iinclude
-
-
-streamvbyte.o: ./src/streamvbyte.c $(HEADERS)
-	$(CC) $(CFLAGS) -c ./src/streamvbyte.c -Iinclude
-
-
-
-$(LIBNAME): $(OBJECTS)
-	$(CC) $(CFLAGS) -o $(LIBNAME) $(OBJECTS)  $(LDFLAGS)
-
-
-
-
-example: ./example.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o example ./example.c -Iinclude  $(OBJECTS)
-
-unit: ./tests/unit.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o unit ./tests/unit.c -Iinclude  $(OBJECTS)
-
-dynunit: ./tests/unit.c    $(HEADERS) $(LIBNAME)
-	$(CC) $(CFLAGS) -o dynunit ./tests/unit.c -Iinclude  -lstreamvbyte
-
-clean:
-	rm -f unit *.o $(LIBNAME) example
--- a/cpp/streamvbyte/src/streamvbyte.c
+++ b/cpp/streamvbyte/src/streamvbyte.c
@@ -1,495 +0,0 @@
-#include "streamvbyte.h"
-#if defined(_MSC_VER)
-     /* Microsoft C/C++-compatible compiler */
-     #include <intrin.h>
-#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
-     /* GCC-compatible compiler, targeting x86/x86-64 */
-     #include <x86intrin.h>
-#elif defined(__GNUC__) && defined(__ARM_NEON__)
-     /* GCC-compatible compiler, targeting ARM with NEON */
-     #include <arm_neon.h>
-#elif defined(__GNUC__) && defined(__IWMMXT__)
-     /* GCC-compatible compiler, targeting ARM with WMMX */
-     #include <mmintrin.h>
-#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__))
-     /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
-     #include <altivec.h>
-#elif defined(__GNUC__) && defined(__SPE__)
-     /* GCC-compatible compiler, targeting PowerPC with SPE */
-     #include <spe.h>
-#endif
-
-static uint8_t lengthTable[256] = { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9,
-		10, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7, 8,
-		9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10,
-		11, 12, 10, 11, 12, 13, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10,
-		11, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10,
-		8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11,
-		12, 10, 11, 12, 13, 11, 12, 13, 14, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10,
-		11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12,
-		13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10,
-		11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, 7, 8, 9, 10, 8,
-		9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, 12,
-		10, 11, 12, 13, 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12,
-		13, 14, 12, 13, 14, 15, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15,
-		13, 14, 15, 16 };
-
-static uint8_t shuffleTable[256][16] = { { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1,
-		-1, -1, 3, -1, -1, -1 }, // 1111
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 },  // 2111
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 3111
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 4111
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 },  // 1211
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 2211
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 3211
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, -1, -1, -1 },     // 4211
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 1311
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 2311
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1, -1, -1 },     // 3311
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, -1, -1, -1 },      // 4311
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, -1, -1, -1 },    // 1411
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, -1, -1, -1 },     // 2411
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, -1, -1, -1 },      // 3411
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, -1, -1, -1 },       // 4411
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1 },  // 1121
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 },   // 2121
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 3121
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 4121
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 },   // 1221
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 2221
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 3221
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, -1, -1, -1 },      // 4221
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 1321
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 2321
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1, -1, -1 },      // 3321
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, -1, -1, -1 },       // 4321
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, -1, -1, -1 },     // 1421
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, -1, -1, -1 },      // 2421
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, -1, -1, -1 },       // 3421
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, -1, -1, -1 },       // 4421
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1 },   // 1131
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 },    // 2131
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 3131
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 4131
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 },    // 1231
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 2231
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 3231
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, -1, -1, -1 },       // 4231
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 1331
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 2331
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1, -1, -1 },       // 3331
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, -1, -1, -1 },       // 4331
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, -1, -1 },      // 1431
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, -1, -1, -1 },       // 2431
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, -1, -1, -1 },       // 3431
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, -1, -1, -1 },       // 4431
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1 },    // 1141
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 },     // 2141
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 3141
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 4141
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 },     // 1241
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 2241
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 3241
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, -1, -1, -1 },       // 4241
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 1341
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 2341
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, -1, -1, -1 },       // 3341
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, -1, -1, -1 },       // 4341
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1 },       // 1441
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1 },       // 2441
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1 },       // 3441
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1 },       // 4441
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1 },  // 1112
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 },   // 2112
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 3112
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 4112
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 },   // 1212
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 2212
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 3212
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, -1, -1 },      // 4212
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 1312
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 2312
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, -1, -1 },      // 3312
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, -1, -1 },       // 4312
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, -1, -1 },     // 1412
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, -1, -1 },      // 2412
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, -1, -1 },       // 3412
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, -1, -1 },       // 4412
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1 },   // 1122
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 },    // 2122
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 3122
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 4122
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 },    // 1222
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 2222
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 3222
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, -1, -1 },       // 4222
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 1322
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 2322
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, -1, -1 },       // 3322
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, -1, -1 },       // 4322
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, -1, -1 },      // 1422
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, -1, -1 },       // 2422
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, -1, -1 },       // 3422
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, -1, -1 },       // 4422
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1 },    // 1132
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 },     // 2132
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 3132
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 4132
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 },     // 1232
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 2232
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 3232
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, -1, -1 },       // 4232
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 1332
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 2332
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, -1, -1 },       // 3332
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, -1, -1 },       // 4332
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, -1, -1 },       // 1432
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, -1, -1 },       // 2432
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, -1, -1 },       // 3432
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, -1, -1 },       // 4432
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1 },     // 1142
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 },      // 2142
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 3142
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 4142
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 },      // 1242
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 2242
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 3242
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, -1, -1 },       // 4242
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 1342
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 2342
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, -1, -1 },       // 3342
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, -1, -1 },       // 4342
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1 },       // 1442
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1 },       // 2442
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1 },       // 3442
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1 },       // 4442
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1 },   // 1113
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 },    // 2113
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 3113
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 4113
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 },    // 1213
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 2213
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 3213
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, -1 },       // 4213
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 1313
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 2313
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1 },       // 3313
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, -1 },       // 4313
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, -1 },      // 1413
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, -1 },       // 2413
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, -1 },       // 3413
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, -1 },       // 4413
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1 },    // 1123
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 },     // 2123
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 3123
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 4123
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 },     // 1223
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 2223
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 3223
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, -1 },       // 4223
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 1323
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 2323
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, -1 },       // 3323
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, -1 },       // 4323
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, -1 },       // 1423
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, -1 },       // 2423
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, -1 },       // 3423
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, -1 },       // 4423
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1 },     // 1133
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 },      // 2133
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 3133
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 4133
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 },      // 1233
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 2233
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 3233
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, -1 },       // 4233
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 1333
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 2333
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1 },       // 3333
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, -1 },       // 4333
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, -1 },       // 1433
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, -1 },       // 2433
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, -1 },       // 3433
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, -1 },       // 4433
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1 },      // 1143
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 },       // 2143
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 3143
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 4143
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 },       // 1243
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 2243
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 3243
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, -1 },       // 4243
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 1343
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 2343
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, -1 },       // 3343
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, -1 },       // 4343
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1 },       // 1443
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1 },       // 2443
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1 },       // 3443
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -1 },       // 4443
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6 },    // 1114
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 },     // 2114
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 3114
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 4114
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 },     // 1214
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 2214
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 3214
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, 10 },       // 4214
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 1314
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 2314
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, 10 },       // 3314
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, 11 },       // 4314
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, 9 },       // 1414
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, 10 },       // 2414
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, 11 },       // 3414
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, 12 },       // 4414
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7 },     // 1124
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 },      // 2124
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 3124
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 4124
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 },      // 1224
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 2224
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 3224
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, 11 },       // 4224
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 1324
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 2324
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, 11 },       // 3324
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, 12 },       // 4324
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, 10 },       // 1424
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, 11 },       // 2424
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, 12 },       // 3424
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, 13 },       // 4424
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8 },      // 1134
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 },       // 2134
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 3134
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 4134
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 },       // 1234
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 2234
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 3234
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, 12 },       // 4234
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 1334
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 2334
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, 12 },       // 3334
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, 13 },       // 4334
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11 },       // 1434
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12 },       // 2434
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, 13 },       // 3434
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, 14 },       // 4434
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9 },       // 1144
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 },       // 2144
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 3144
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 4144
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 },       // 1244
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 2244
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 3244
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, 13 },       // 4244
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 1344
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 2344
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13 },       // 3344
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, 14 },       // 4344
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 },       // 1444
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 },       // 2444
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 },       // 3444
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }        // 4444
-};
-
-static uint8_t _encode_data(uint32_t val, uint8_t *__restrict__ *dataPtrPtr) {
-	uint8_t *dataPtr = *dataPtrPtr;
-	uint8_t code;
-
-	if (val < (1 << 8)) { // 1 byte
-		*dataPtr = (uint8_t)(val);
-		*dataPtrPtr += 1;
-		code = 0;
-	} else if (val < (1 << 16)) { // 2 bytes
-		*(uint16_t *) dataPtr = (uint16_t)(val);
-		*dataPtrPtr += 2;
-		code = 1;
-	} else if (val < (1 << 24)) { // 3 bytes
-		*(uint16_t *) dataPtr = (uint16_t)(val);
-		*(dataPtr + 2) = (uint8_t)(val >> 16);
-		*dataPtrPtr += 3;
-		code = 2;
-	} else { // 4 bytes
-		*(uint32_t *) dataPtr = val;
-		*dataPtrPtr += 4;
-		code = 3;
-	}
-
-	return code;
-}
-
-static uint8_t *svb_encode_scalar(const uint32_t *in,
-		uint8_t *__restrict__ keyPtr, uint8_t *__restrict__ dataPtr,
-		uint32_t count) {
-	if (count == 0)
-		return dataPtr; // exit immediately if no data
-
-	uint8_t shift = 0; // cycles 0, 2, 4, 6, 0, 2, 4, 6, ...
-	uint8_t key = 0;
-	for (uint32_t c = 0; c < count; c++) {
-		if (shift == 8) {
-			shift = 0;
-			*keyPtr++ = key;
-			key = 0;
-		}
-		uint32_t val = in[c];
-		uint8_t code = _encode_data(val, &dataPtr);
-		key |= code << shift;
-		shift += 2;
-	}
-
-	*keyPtr = key;  // write last key (no increment needed)
-	return dataPtr; // pointer to first unused data byte
-}
-
-// Encode an array of a given length read from in to bout in streamvbyte format.
-// Returns the number of bytes written.
-size_t streamvbyte_encode(const uint32_t *in, uint32_t count, uint8_t *out) {
-	uint8_t *keyPtr = out;
-	uint32_t keyLen = (count + 3) / 4; // 2-bits rounded to full byte
-	uint8_t *dataPtr = keyPtr + keyLen; // variable byte data after all keys
-	return svb_encode_scalar(in, keyPtr, dataPtr, count) - out;
-}
-
-static inline __m128i _decode_avx(uint32_t key,
-		const uint8_t *__restrict__ *dataPtrPtr) {
-	uint8_t len = lengthTable[key];
-	__m128i Data = _mm_loadu_si128((__m128i *) *dataPtrPtr);
-	__m128i Shuf = *(__m128i *) &shuffleTable[key];
-
-	Data = _mm_shuffle_epi8(Data, Shuf);
-	*dataPtrPtr += len;
-	return Data;
-}
-
-static inline void _write_avx(uint32_t *out, __m128i Vec) {
-	_mm_storeu_si128((__m128i *) out, Vec);
-}
-
-static inline uint32_t _decode_data(const uint8_t **dataPtrPtr, uint8_t code) {
-	const uint8_t *dataPtr = *dataPtrPtr;
-	uint32_t val;
-
-	if (code == 0) { // 1 byte
-		val = (uint32_t) * dataPtr;
-		dataPtr += 1;
-	} else if (code == 1) { // 2 bytes
-		val = (uint32_t) * (uint16_t *) dataPtr;
-		dataPtr += 2;
-	} else if (code == 2) { // 3 bytes
-		val = (uint32_t) * (uint16_t *) dataPtr;
-		val |= *(dataPtr + 2) << 16;
-		dataPtr += 3;
-	} else {                      // code == 3
-		val = *(uint32_t *) dataPtr; // 4 bytes
-		dataPtr += 4;
-	}
-
-	*dataPtrPtr = dataPtr;
-	return val;
-}
-static const uint8_t *svb_decode_scalar(uint32_t *outPtr, const uint8_t *keyPtr,
-		const uint8_t *dataPtr, uint32_t count) {
-	if (count == 0)
-		return dataPtr; // no reads or writes if no data
-
-	uint8_t shift = 0;
-	uint32_t key = *keyPtr++;
-	for (uint32_t c = 0; c < count; c++) {
-		if (shift == 8) {
-			shift = 0;
-			key = *keyPtr++;
-		}
-		uint32_t val = _decode_data(&dataPtr, (key >> shift) & 0x3);
-		*outPtr++ = val;
-		shift += 2;
-	}
-
-	return dataPtr; // pointer to first unused byte after end
-}
-
-const uint8_t *svb_decode_avx_simple(uint32_t *out,
-		const uint8_t *__restrict__ keyPtr, const uint8_t *__restrict__ dataPtr,
-		uint64_t count) {
-
-	uint64_t keybytes = count / 4; // number of key bytes
-	__m128i Data;
-	if (keybytes >= 8) {
-
-		int64_t Offset = -(int64_t) keybytes / 8 + 1;
-
-		const uint64_t *keyPtr64 = (const uint64_t *) keyPtr - Offset;
-		uint64_t nextkeys = keyPtr64[Offset];
-		for (; Offset != 0; ++Offset) {
-			uint64_t keys = nextkeys;
-			nextkeys = keyPtr64[Offset + 1];
-
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 4, Data);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out + 8, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 12, Data);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out + 16, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 20, Data);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out + 24, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 28, Data);
-
-			out += 32;
-		}
-		{
-			uint64_t keys = nextkeys;
-
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 4, Data);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out + 8, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 12, Data);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out + 16, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 20, Data);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0xFF), &dataPtr);
-			_write_avx(out + 24, Data);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			_write_avx(out + 28, Data);
-
-			out += 32;
-		}
-	}
-	uint64_t consumedkeys = keybytes - (keybytes & 7);
-	return svb_decode_scalar(out, keyPtr + consumedkeys, dataPtr, count & 31);
-}
-
-// Read count 32-bit integers in maskedvbyte format from in, storing the result in out.  Returns the number of bytes read.
-size_t streamvbyte_decode(const uint8_t* in, uint32_t* out, uint32_t count) {
-	if (count == 0)
-		return 0;
-	const uint8_t *keyPtr = in;            // full list of keys is next
-	uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up)
-	const uint8_t *dataPtr = keyPtr + keyLen;  // data starts at end of keys
-	return svb_decode_avx_simple(out, keyPtr, dataPtr, count) - in;
-
-}
--- a/cpp/streamvbyte/src/streamvbytedelta.c
+++ b/cpp/streamvbyte/src/streamvbytedelta.c
@@ -1,575 +0,0 @@
-#include "streamvbyte.h"
-#if defined(_MSC_VER)
-     /* Microsoft C/C++-compatible compiler */
-     #include <intrin.h>
-#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
-     /* GCC-compatible compiler, targeting x86/x86-64 */
-     #include <x86intrin.h>
-#elif defined(__GNUC__) && defined(__ARM_NEON__)
-     /* GCC-compatible compiler, targeting ARM with NEON */
-     #include <arm_neon.h>
-#elif defined(__GNUC__) && defined(__IWMMXT__)
-     /* GCC-compatible compiler, targeting ARM with WMMX */
-     #include <mmintrin.h>
-#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__))
-     /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
-     #include <altivec.h>
-#elif defined(__GNUC__) && defined(__SPE__)
-     /* GCC-compatible compiler, targeting PowerPC with SPE */
-     #include <spe.h>
-#endif
-
-static uint8_t lengthTable[256] = { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9,
-		10, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7, 8,
-		9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10,
-		11, 12, 10, 11, 12, 13, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10,
-		11, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10,
-		8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11,
-		12, 10, 11, 12, 13, 11, 12, 13, 14, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10,
-		11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12,
-		13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10,
-		11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, 7, 8, 9, 10, 8,
-		9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, 12,
-		10, 11, 12, 13, 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12,
-		13, 14, 12, 13, 14, 15, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15,
-		13, 14, 15, 16 };
-
-static uint8_t shuffleTable[256][16] = { { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1,
-		-1, -1, 3, -1, -1, -1 }, // 1111
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 },  // 2111
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 3111
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 4111
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 },  // 1211
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 2211
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 3211
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, -1, -1, -1 },     // 4211
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 1311
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 2311
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1, -1, -1 },     // 3311
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, -1, -1, -1 },      // 4311
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, -1, -1, -1 },    // 1411
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, -1, -1, -1 },     // 2411
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, -1, -1, -1 },      // 3411
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, -1, -1, -1 },       // 4411
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1 },  // 1121
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 },   // 2121
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 3121
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 4121
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 },   // 1221
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 2221
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 3221
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, -1, -1, -1 },      // 4221
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 1321
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 2321
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1, -1, -1 },      // 3321
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, -1, -1, -1 },       // 4321
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, -1, -1, -1 },     // 1421
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, -1, -1, -1 },      // 2421
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, -1, -1, -1 },       // 3421
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, -1, -1, -1 },       // 4421
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1 },   // 1131
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 },    // 2131
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 3131
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 4131
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 },    // 1231
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 2231
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 3231
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, -1, -1, -1 },       // 4231
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 1331
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 2331
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1, -1, -1 },       // 3331
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, -1, -1, -1 },       // 4331
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, -1, -1 },      // 1431
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, -1, -1, -1 },       // 2431
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, -1, -1, -1 },       // 3431
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, -1, -1, -1 },       // 4431
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1 },    // 1141
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 },     // 2141
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 3141
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 4141
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 },     // 1241
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 2241
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 3241
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, -1, -1, -1 },       // 4241
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 1341
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 2341
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, -1, -1, -1 },       // 3341
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, -1, -1, -1 },       // 4341
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1 },       // 1441
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1 },       // 2441
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1 },       // 3441
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1 },       // 4441
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1 },  // 1112
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 },   // 2112
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 3112
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 4112
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 },   // 1212
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 2212
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 3212
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, -1, -1 },      // 4212
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 1312
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 2312
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, -1, -1 },      // 3312
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, -1, -1 },       // 4312
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, -1, -1 },     // 1412
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, -1, -1 },      // 2412
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, -1, -1 },       // 3412
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, -1, -1 },       // 4412
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1 },   // 1122
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 },    // 2122
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 3122
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 4122
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 },    // 1222
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 2222
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 3222
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, -1, -1 },       // 4222
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 1322
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 2322
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, -1, -1 },       // 3322
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, -1, -1 },       // 4322
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, -1, -1 },      // 1422
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, -1, -1 },       // 2422
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, -1, -1 },       // 3422
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, -1, -1 },       // 4422
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1 },    // 1132
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 },     // 2132
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 3132
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 4132
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 },     // 1232
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 2232
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 3232
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, -1, -1 },       // 4232
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 1332
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 2332
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, -1, -1 },       // 3332
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, -1, -1 },       // 4332
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, -1, -1 },       // 1432
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, -1, -1 },       // 2432
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, -1, -1 },       // 3432
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, -1, -1 },       // 4432
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1 },     // 1142
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 },      // 2142
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 3142
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 4142
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 },      // 1242
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 2242
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 3242
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, -1, -1 },       // 4242
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 1342
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 2342
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, -1, -1 },       // 3342
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, -1, -1 },       // 4342
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1 },       // 1442
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1 },       // 2442
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1 },       // 3442
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1 },       // 4442
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1 },   // 1113
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 },    // 2113
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 3113
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 4113
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 },    // 1213
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 2213
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 3213
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, -1 },       // 4213
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 1313
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 2313
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1 },       // 3313
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, -1 },       // 4313
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, -1 },      // 1413
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, -1 },       // 2413
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, -1 },       // 3413
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, -1 },       // 4413
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1 },    // 1123
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 },     // 2123
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 3123
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 4123
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 },     // 1223
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 2223
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 3223
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, -1 },       // 4223
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 1323
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 2323
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, -1 },       // 3323
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, -1 },       // 4323
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, -1 },       // 1423
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, -1 },       // 2423
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, -1 },       // 3423
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, -1 },       // 4423
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1 },     // 1133
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 },      // 2133
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 3133
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 4133
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 },      // 1233
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 2233
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 3233
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, -1 },       // 4233
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 1333
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 2333
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1 },       // 3333
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, -1 },       // 4333
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, -1 },       // 1433
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, -1 },       // 2433
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, -1 },       // 3433
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, -1 },       // 4433
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1 },      // 1143
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 },       // 2143
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 3143
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 4143
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 },       // 1243
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 2243
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 3243
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, -1 },       // 4243
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 1343
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 2343
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, -1 },       // 3343
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, -1 },       // 4343
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1 },       // 1443
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1 },       // 2443
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1 },       // 3443
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -1 },       // 4443
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6 },    // 1114
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 },     // 2114
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 3114
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 4114
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 },     // 1214
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 2214
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 3214
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, 10 },       // 4214
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 1314
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 2314
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, 10 },       // 3314
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, 11 },       // 4314
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, 9 },       // 1414
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, 10 },       // 2414
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, 11 },       // 3414
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, 12 },       // 4414
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7 },     // 1124
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 },      // 2124
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 3124
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 4124
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 },      // 1224
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 2224
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 3224
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, 11 },       // 4224
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 1324
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 2324
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, 11 },       // 3324
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, 12 },       // 4324
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, 10 },       // 1424
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, 11 },       // 2424
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, 12 },       // 3424
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, 13 },       // 4424
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8 },      // 1134
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 },       // 2134
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 3134
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 4134
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 },       // 1234
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 2234
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 3234
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, 12 },       // 4234
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 1334
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 2334
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, 12 },       // 3334
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, 13 },       // 4334
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11 },       // 1434
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12 },       // 2434
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, 13 },       // 3434
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, 14 },       // 4434
-		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9 },       // 1144
-		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 },       // 2144
-		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 3144
-		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 4144
-		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 },       // 1244
-		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 2244
-		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 3244
-		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, 13 },       // 4244
-		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 1344
-		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 2344
-		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13 },       // 3344
-		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, 14 },       // 4344
-		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 },       // 1444
-		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 },       // 2444
-		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 },       // 3444
-		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }        // 4444
-};
-
-static uint8_t _encode_data(uint32_t val, uint8_t *__restrict__ *dataPtrPtr) {
-	uint8_t *dataPtr = *dataPtrPtr;
-	uint8_t code;
-
-	if (val < (1 << 8)) { // 1 byte
-		*dataPtr = (uint8_t)(val);
-		*dataPtrPtr += 1;
-		code = 0;
-	} else if (val < (1 << 16)) { // 2 bytes
-		*(uint16_t *) dataPtr = (uint16_t)(val);
-		*dataPtrPtr += 2;
-		code = 1;
-	} else if (val < (1 << 24)) { // 3 bytes
-		*(uint16_t *) dataPtr = (uint16_t)(val);
-		*(dataPtr + 2) = (uint8_t)(val >> 16);
-		*dataPtrPtr += 3;
-		code = 2;
-	} else { // 4 bytes
-		*(uint32_t *) dataPtr = val;
-		*dataPtrPtr += 4;
-		code = 3;
-	}
-
-	return code;
-}
-
-static uint8_t *svb_encode_scalar_d1_init(const uint32_t *in,
-		uint8_t *__restrict__ keyPtr, uint8_t *__restrict__ dataPtr,
-		uint32_t count, uint32_t prev) {
-	if (count == 0)
-		return dataPtr; // exit immediately if no data
-
-	uint8_t shift = 0; // cycles 0, 2, 4, 6, 0, 2, 4, 6, ...
-	uint8_t key = 0;
-	for (uint32_t c = 0; c < count; c++) {
-		if (shift == 8) {
-			shift = 0;
-			*keyPtr++ = key;
-			key = 0;
-		}
-		uint32_t val = in[c] - prev;
-		prev = in[c];
-		uint8_t code = _encode_data(val, &dataPtr);
-		key |= code << shift;
-		shift += 2;
-	}
-
-	*keyPtr = key;  // write last key (no increment needed)
-	return dataPtr; // pointer to first unused data byte
-}
-
-size_t streamvbyte_delta_encode(const uint32_t *in, uint32_t count, uint8_t *out,
-		uint32_t prev) {
-	uint8_t *keyPtr = out;         // keys come immediately after 32-bit count
-	uint32_t keyLen = (count + 3) / 4; // 2-bits rounded to full byte
-	uint8_t *dataPtr = keyPtr + keyLen; // variable byte data after all keys
-
-	return svb_encode_scalar_d1_init(in, keyPtr, dataPtr, count, prev) - out;
-
-}
-
-static inline __m128i _decode_avx(uint32_t key, const uint8_t *__restrict__ *dataPtrPtr) {
-	uint8_t len = lengthTable[key];
-	__m128i Data = _mm_loadu_si128((__m128i *) *dataPtrPtr);
-	__m128i Shuf = *(__m128i *) &shuffleTable[key];
-
-	Data = _mm_shuffle_epi8(Data, Shuf);
-	*dataPtrPtr += len;
-
-	return Data;
-}
-#define BroadcastLastXMM 0xFF // bits 0-7 all set to choose highest element
-
-
-
-static inline void _write_avx(uint32_t *out, __m128i Vec) {
-	_mm_storeu_si128((__m128i *) out, Vec);
-}
-
-static __m128i _write_avx_d1(uint32_t *out, __m128i Vec, __m128i Prev) {
-	__m128i Add = _mm_slli_si128(Vec, 4); // Cycle 1: [- A B C] (already done)
-	Prev = _mm_shuffle_epi32(Prev, BroadcastLastXMM); // Cycle 2: [P P P P]
-	Vec = _mm_add_epi32(Vec, Add);                    // Cycle 2: [A AB BC CD]
-	Add = _mm_slli_si128(Vec, 8);                     // Cycle 3: [- - A AB]
-	Vec = _mm_add_epi32(Vec, Prev);                 // Cycle 3: [PA PAB PBC PCD]
-	Vec = _mm_add_epi32(Vec, Add); // Cycle 4: [PA PAB PABC PABCD]
-
-	_write_avx(out, Vec);
-	return Vec;
-}
-
-#ifndef _MSC_VER
-static __m128i High16To32 = {0xFFFF0B0AFFFF0908, 0xFFFF0F0EFFFF0D0C};
-#else
-static __m128i High16To32 = {8,  9,  -1, -1, 10, 11, -1, -1,
-                           12, 13, -1, -1, 14, 15, -1, -1};
-#endif
-
-static inline __m128i _write_16bit_avx_d1(uint32_t *out, __m128i Vec, __m128i Prev) {
-  // vec == [A B C D E F G H] (16 bit values)
-  __m128i Add = _mm_slli_si128(Vec, 2);               // [- A B C D E F G]
-  Prev = _mm_shuffle_epi32(Prev, BroadcastLastXMM); // [P P P P] (32-bit)
-  Vec = _mm_add_epi32(Vec, Add);                    // [A AB BC CD DE FG GH]
-  Add = _mm_slli_si128(Vec, 4);                     // [- - A AB BC CD DE EF]
-  Vec = _mm_add_epi32(Vec, Add);      // [A AB ABC ABCD BCDE CDEF DEFG EFGH]
-  __m128i V1 = _mm_cvtepu16_epi32(Vec); // [A AB ABC ABCD] (32-bit)
-  V1 = _mm_add_epi32(V1, Prev);       // [PA PAB PABC PABCD] (32-bit)
-  __m128i V2 =
-      _mm_shuffle_epi8(Vec, High16To32); // [BCDE CDEF DEFG EFGH] (32-bit)
-  V2 = _mm_add_epi32(V1, V2); // [PABCDE PABCDEF PABCDEFG PABCDEFGH] (32-bit)
-  _write_avx(out, V1);
-  _write_avx(out + 4, V2);
-  return V2;
-}
-
-static inline uint32_t _decode_data(const uint8_t **dataPtrPtr, uint8_t code) {
-	const uint8_t *dataPtr = *dataPtrPtr;
-	uint32_t val;
-
-	if (code == 0) { // 1 byte
-		val = (uint32_t) * dataPtr;
-		dataPtr += 1;
-	} else if (code == 1) { // 2 bytes
-		val = (uint32_t) * (uint16_t *) dataPtr;
-		dataPtr += 2;
-	} else if (code == 2) { // 3 bytes
-		val = (uint32_t) * (uint16_t *) dataPtr;
-		val |= *(dataPtr + 2) << 16;
-		dataPtr += 3;
-	} else {                      // code == 3
-		val = *(uint32_t *) dataPtr; // 4 bytes
-		dataPtr += 4;
-	}
-
-	*dataPtrPtr = dataPtr;
-	return val;
-}
-
-const uint8_t *svb_decode_scalar_d1_init(uint32_t *outPtr, const uint8_t *keyPtr,
-		const uint8_t *dataPtr, uint32_t count,
-                                   uint32_t prev) {
-  if (count == 0)
-    return dataPtr; // no reads or writes if no data
-
-  uint8_t shift = 0;
-  uint32_t key = *keyPtr++;
-
-  for (uint32_t c = 0; c < count; c++) {
-    if (shift == 8) {
-      shift = 0;
-      key = *keyPtr++;
-    }
-    uint32_t val = _decode_data(&dataPtr, (key >> shift) & 0x3);
-    val += prev;
-    *outPtr++ = val;
-    prev = val;
-    shift += 2;
-  }
-
-  return dataPtr; // pointer to first unused byte after end
-}
-
-const uint8_t *svb_decode_avx_d1_init(uint32_t *out, const uint8_t *__restrict__ keyPtr,
-		const uint8_t *__restrict__ dataPtr, uint64_t count, uint32_t prev) {
-	uint64_t keybytes = count / 4; // number of key bytes
-	if (keybytes >= 8) {
-		__m128i Prev = _mm_set1_epi32(prev);
-		__m128i Data;
-
-		int64_t Offset = -(int64_t) keybytes / 8 + 1;
-
-		const uint64_t *keyPtr64 = (const uint64_t *) keyPtr - Offset;
-		uint64_t nextkeys = keyPtr64[Offset];
-		for (; Offset != 0; ++Offset) {
-			uint64_t keys = nextkeys;
-			nextkeys = keyPtr64[Offset + 1];
-			// faster 16-bit delta since we only have 8-bit values
-			if (!keys) { // 32 1-byte ints in a row
-
-				Data = _mm_cvtepu8_epi16(_mm_lddqu_si128((__m128i *) (dataPtr)));
-				Prev = _write_16bit_avx_d1(out, Data, Prev);
-				Data = _mm_cvtepu8_epi16(
-						_mm_lddqu_si128((__m128i *) (dataPtr + 8)));
-				Prev = _write_16bit_avx_d1(out + 8, Data, Prev);
-				Data = _mm_cvtepu8_epi16(
-						_mm_lddqu_si128((__m128i *) (dataPtr + 16)));
-				Prev = _write_16bit_avx_d1(out + 16, Data, Prev);
-				Data = _mm_cvtepu8_epi16(
-						_mm_lddqu_si128((__m128i *) (dataPtr + 24)));
-				Prev = _write_16bit_avx_d1(out + 24, Data, Prev);
-				out += 32;
-				dataPtr += 32;
-				continue;
-			}
-
-			Data = _decode_avx(keys & 0x00FF, &dataPtr);
-			Prev = _write_avx_d1(out, Data, Prev);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			Prev = _write_avx_d1(out + 4, Data, Prev);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0x00FF), &dataPtr);
-			Prev = _write_avx_d1(out + 8, Data, Prev);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			Prev = _write_avx_d1(out + 12, Data, Prev);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0x00FF), &dataPtr);
-			Prev = _write_avx_d1(out + 16, Data, Prev);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			Prev = _write_avx_d1(out + 20, Data, Prev);
-
-			keys >>= 16;
-			Data = _decode_avx((keys & 0x00FF), &dataPtr);
-			Prev = _write_avx_d1(out + 24, Data, Prev);
-			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-			Prev = _write_avx_d1(out + 28, Data, Prev);
-
-			out += 32;
-		}
-		{
-			uint64_t keys = nextkeys;
-			// faster 16-bit delta since we only have 8-bit values
-			if (!keys) { // 32 1-byte ints in a row
-				Data = _mm_cvtepu8_epi16(_mm_lddqu_si128((__m128i *) (dataPtr)));
-				Prev = _write_16bit_avx_d1(out, Data, Prev);
-				Data = _mm_cvtepu8_epi16(
-						_mm_lddqu_si128((__m128i *) (dataPtr + 8)));
-				Prev = _write_16bit_avx_d1(out + 8, Data, Prev);
-				Data = _mm_cvtepu8_epi16(
-						_mm_lddqu_si128((__m128i *) (dataPtr + 16)));
-				Prev = _write_16bit_avx_d1(out + 16, Data, Prev);
-				Data = _mm_cvtepu8_epi16(
-						_mm_loadl_epi64((__m128i *) (dataPtr + 24)));
-				Prev = _write_16bit_avx_d1(out + 24, Data, Prev);
-				out += 32;
-				dataPtr += 32;
-
-			} else {
-
-				Data = _decode_avx(keys & 0x00FF, &dataPtr);
-				Prev = _write_avx_d1(out, Data, Prev);
-				Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-				Prev = _write_avx_d1(out + 4, Data, Prev);
-
-				keys >>= 16;
-				Data = _decode_avx((keys & 0x00FF), &dataPtr);
-				Prev = _write_avx_d1(out + 8, Data, Prev);
-				Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-				Prev = _write_avx_d1(out + 12, Data, Prev);
-
-				keys >>= 16;
-				Data = _decode_avx((keys & 0x00FF), &dataPtr);
-				Prev = _write_avx_d1(out + 16, Data, Prev);
-				Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-				Prev = _write_avx_d1(out + 20, Data, Prev);
-
-				keys >>= 16;
-				Data = _decode_avx((keys & 0x00FF), &dataPtr);
-				Prev = _write_avx_d1(out + 24, Data, Prev);
-				Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
-				Prev = _write_avx_d1(out + 28, Data, Prev);
-
-				out += 32;
-			}
-		}
-		prev = out[-1];
-	}
-	uint64_t consumedkeys = keybytes - (keybytes & 7);
-	return svb_decode_scalar_d1_init(out, keyPtr + consumedkeys, dataPtr,
-			count & 31, prev);
-}
-
-size_t streamvbyte_delta_decode(const uint8_t* in, uint32_t* out,
-		uint32_t count, uint32_t prev) {
-	uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up)
-	const uint8_t *keyPtr = in;
-	const uint8_t *dataPtr = keyPtr + keyLen;  // data starts at end of keys
-	return svb_decode_avx_d1_init(out, keyPtr, dataPtr, count, prev) - in;
-}
--- a/cpp/streamvbyte/tests/unit.c
+++ b/cpp/streamvbyte/tests/unit.c
@@ -1,73 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "streamvbyte.h"
-#include "streamvbytedelta.h"
-
-int main() {
-	int N = 4096;
-	uint32_t * datain = malloc(N * sizeof(uint32_t));
-	uint8_t * compressedbuffer = malloc(2 * N * sizeof(uint32_t));
-	uint32_t * recovdata = malloc(N * sizeof(uint32_t));
-
-	for (int length = 0; length <= N;) {
-		printf("length = %d \n", length);
-		for (uint32_t gap = 1; gap <= 387420489; gap *= 3) {
-			for (int k = 0; k < length; ++k)
-				datain[k] = gap;
-			size_t compsize = streamvbyte_encode(datain, length,
-					compressedbuffer);
-			size_t usedbytes = streamvbyte_decode(compressedbuffer, recovdata,
-					length);
-			if (compsize != usedbytes) {
-				printf(
-						"[streamvbyte_decode] code is buggy gap = %d, size mismatch %d %d \n",
-						(int) gap, (int) compsize, (int) usedbytes);
-				return -1;
-			}
-			for (int k = 0; k < length; ++k) {
-				if (recovdata[k] != datain[k]) {
-					printf("[streamvbyte_decode] code is buggy gap = %d\n",
-							(int) gap);
-					return -1;
-				}
-			}
-		}
-
-		printf("Delta \n");
-		for (size_t gap = 1; gap <= 531441; gap *= 3) {
-			for (int k = 0; k < length; ++k)
-				datain[k] = gap * k;
-			size_t compsize = streamvbyte_delta_encode(datain, length,
-					compressedbuffer, 0);
-			size_t usedbytes = streamvbyte_delta_decode(compressedbuffer,
-					recovdata, length, 0);
-			if (compsize != usedbytes) {
-				printf(
-						"[streamvbyte_delta_decode] code is buggy gap = %d, size mismatch %d %d \n",
-						(int) gap, (int) compsize, (int) usedbytes);
-				return -1;
-			}
-			for (int k = 0; k < length; ++k) {
-				if (recovdata[k] != datain[k]) {
-					printf(
-							"[streamvbyte_delta_decode] code is buggy gap = %d\n",
-							(int) gap);
-					return -1;
-				}
-			}
-
-		}
-
-		if (length < 128)
-			++length;
-		else {
-			length *= 2;
-		}
-	}
-	free(datain);
-	free(compressedbuffer);
-	free(recovdata);
-	printf("Code looks good.\n");
-	return 0;
-}
--- a/rustfmt.toml
+++ b/rustfmt.toml
@@ -0,0 +1 @@
+use_try_shorthand = true
--- a/script/build-doc.sh
+++ b/script/build-doc.sh
@@ -1,10 +0,0 @@
-#!/bin/bash
-DEST=target/doc/tantivy/docs/
-mkdir -p $DEST
-
-for f in $(ls docs/*.md)
-do
-    rustdoc $f -o $DEST --markdown-css ../../rustdoc.css --markdown-css style.css
-done
-
-cp docs/*.css $DEST
--- a/script/profile.sh
+++ b/script/profile.sh
@@ -1,5 +0,0 @@
-#/bin/bash
-valgrind --tool=cachegrind target/release/tantivy-bench -i /data/wiki-index -q ./queries.txt -n 3
-valgrind --tool=callgrind target/release/tantivy-bench -i /data/wiki-index -q ./queries.txt -n 3
-
-
--- a/src/collector/chained_collector.rs
+++ b/src/collector/chained_collector.rs
@@ -16,6 +16,10 @@ impl Collector for DoNothingCollector {
    }
    #[inline]
    fn collect(&mut self, _doc: DocId, _score: Score) {}
+    #[inline]
+    fn requires_scoring(&self) -> bool {
+        false
+    }
 }

 /// Zero-cost abstraction used to collect on multiple collectors.
@@ -42,8 +46,8 @@ impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Rig
        segment_local_id: SegmentLocalId,
        segment: &SegmentReader,
    ) -> Result<()> {
-        try!(self.left.set_segment(segment_local_id, segment));
-        try!(self.right.set_segment(segment_local_id, segment));
+        self.left.set_segment(segment_local_id, segment)?;
+        self.right.set_segment(segment_local_id, segment)?;
        Ok(())
    }

@@ -51,6 +55,10 @@ impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Rig
        self.left.collect(doc, score);
        self.right.collect(doc, score);
    }
+
+    fn requires_scoring(&self) -> bool {
+        self.left.requires_scoring() || self.right.requires_scoring()
+    }
 }

 /// Creates a `ChainedCollector`
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -7,6 +7,7 @@ use SegmentLocalId;

 /// `CountCollector` collector only counts how many
 /// documents match the query.
+#[derive(Default)]
 pub struct CountCollector {
    count: usize,
 }
@@ -19,12 +20,6 @@ impl CountCollector {
    }
 }

-impl Default for CountCollector {
-    fn default() -> CountCollector {
-        CountCollector { count: 0 }
-    }
-}
-
 impl Collector for CountCollector {
    fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
        Ok(())
@@ -33,23 +28,27 @@ impl Collector for CountCollector {
    fn collect(&mut self, _: DocId, _: Score) {
        self.count += 1;
    }
+
+    fn requires_scoring(&self) -> bool {
+        false
+    }
 }

 #[cfg(test)]
 mod tests {

-    use super::*;
-    use test::Bencher;
-    use collector::Collector;
+    use collector::{Collector, CountCollector};

-    #[bench]
-    fn build_collector(b: &mut Bencher) {
-        b.iter(|| {
-            let mut count_collector = CountCollector::default();
-            for doc in 0..1_000_000 {
-                count_collector.collect(doc, 1f32);
-            }
-            count_collector.count()
-        });
+    #[test]
+    fn test_count_collector() {
+        let mut count_collector = CountCollector::default();
+        assert_eq!(count_collector.count(), 0);
+        count_collector.collect(0u32, 1f32);
+        assert_eq!(count_collector.count(), 1);
+        assert_eq!(count_collector.count(), 1);
+        count_collector.collect(1u32, 1f32);
+        assert_eq!(count_collector.count(), 2);
+        assert!(!count_collector.requires_scoring());
    }
+
 }
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -1,113 +1,637 @@
-use std::cmp::Eq;
-use std::collections::HashMap;
-use std::hash::Hash;
-
+use std::mem;
 use collector::Collector;
-use fastfield::FastFieldReader;
+use fastfield::FacetReader;
 use schema::Field;
+use std::cell::UnsafeCell;
+use schema::Facet;
+use std::collections::BTreeMap;
+use std::collections::BinaryHeap;
+use std::collections::Bound;
+use termdict::TermDictionary;
+use termdict::TermStreamer;
+use termdict::TermStreamerBuilder;
+use std::collections::BTreeSet;
+use termdict::TermMerger;
+use docset::SkipResult;
+use std::{usize, u64};
+use std::iter::Peekable;

 use DocId;
 use Result;
 use Score;
 use SegmentReader;
 use SegmentLocalId;
+use std::cmp::Ordering;

-/// Facet collector  for i64/u64 fast field
-pub struct FacetCollector<T>
-where
-    T: FastFieldReader,
-    T::ValueType: Eq + Hash,
-{
-    counters: HashMap<T::ValueType, u64>,
-    field: Field,
-    ff_reader: Option<T>,
+struct Hit<'a> {
+    count: u64,
+    facet: &'a Facet,
 }

-impl<T> FacetCollector<T>
-where
-    T: FastFieldReader,
-    T::ValueType: Eq + Hash,
-{
-    /// Creates a new facet collector for aggregating a given field.
-    pub fn new(field: Field) -> FacetCollector<T> {
-        FacetCollector {
-            counters: HashMap::new(),
-            field: field,
-            ff_reader: None,
-        }
+impl<'a> Eq for Hit<'a> {}
+
+impl<'a> PartialEq<Hit<'a>> for Hit<'a> {
+    fn eq(&self, other: &Hit) -> bool {
+        self.count == other.count
    }
 }

-impl<T> Collector for FacetCollector<T>
-where
-    T: FastFieldReader,
-    T::ValueType: Eq + Hash,
-{
+impl<'a> PartialOrd<Hit<'a>> for Hit<'a> {
+    fn partial_cmp(&self, other: &Hit) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<'a> Ord for Hit<'a> {
+    fn cmp(&self, other: &Self) -> Ordering {
+        other.count.cmp(&self.count)
+    }
+}
+
+struct SegmentFacetCounter {
+    pub facet_reader: FacetReader,
+    pub facet_ords: Vec<u64>,
+    pub facet_counts: Vec<u64>,
+}
+
+fn facet_depth(facet_bytes: &[u8]) -> usize {
+    if facet_bytes.is_empty() {
+        0
+    } else {
+        facet_bytes.iter().cloned().filter(|b| *b == 0u8).count() + 1
+    }
+}
+
+/// Collector for faceting
+///
+/// The collector collects all facets. You need to configure it
+/// beforehand with the facet you want to extract.
+///
+/// This is done by calling `.add_facet(...)` with the root of the
+/// facet you want to extract as argument.
+///
+/// Facet counts will only be computed for the facet that are direct children
+/// of such a root facet.
+///
+/// For instance, if your index represents books, your hierarchy of facets
+/// may contain `category`, `language`.
+///
+/// The category facet may include `subcategories`. For instance, a book
+/// could belong to `/category/fiction/fantasy`.
+///
+/// If you request the facet counts for `/category`, the result will be
+/// the breakdown of counts for the direct children of `/category`
+/// (e.g. `/category/fiction`, `/category/biography`, `/category/personal_development`).
+///
+/// Once collection is finished, you can harvest its results in the form
+/// of a `FacetCounts` object, and extract your face                t counts from it.
+///
+/// This implementation assumes you are working with a number of facets that
+/// is much hundreds of time lower than your number of documents.
+///
+///
+/// ```rust
+/// #[macro_use]
+/// extern crate tantivy;
+/// use tantivy::schema::{Facet, SchemaBuilder, TEXT};
+/// use tantivy::{Index, Result};
+/// use tantivy::collector::FacetCollector;
+/// use tantivy::query::AllQuery;
+///
+/// # fn main() { example().unwrap(); }
+/// fn example() -> Result<()> {
+///     let mut schema_builder = SchemaBuilder::new();
+///
+///     // Facet have their own specific type.
+///     // It is not a bad practise to put all of your
+///     // facet information in the same field.
+///     let facet = schema_builder.add_facet_field("facet");
+///     let title = schema_builder.add_text_field("title", TEXT);
+///     let schema = schema_builder.build();
+///     let index = Index::create_in_ram(schema);
+///     {
+///         let mut index_writer = index.writer(3_000_000)?;
+///         // a document can be associated to any number of facets
+///         index_writer.add_document(doc!(
+///             title => "The Name of the Wind",
+///             facet => Facet::from("/lang/en"),
+///             facet => Facet::from("/category/fiction/fantasy")
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "Dune",
+///             facet => Facet::from("/lang/en"),
+///             facet => Facet::from("/category/fiction/sci-fi")
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "La Vénus d'Ille",
+///             facet => Facet::from("/lang/fr"),
+///             facet => Facet::from("/category/fiction/fantasy"),
+///             facet => Facet::from("/category/fiction/horror")
+///         ));
+///         index_writer.add_document(doc!(
+///             title => "The Diary of a Young Girl",
+///             facet => Facet::from("/lang/en"),
+///             facet => Facet::from("/category/biography")
+///         ));
+///         index_writer.commit().unwrap();
+///     }
+///
+///     index.load_searchers()?;
+///     let searcher = index.searcher();
+///
+///     {
+///			let mut facet_collector = FacetCollector::for_field(facet);
+///         facet_collector.add_facet("/lang");
+///         facet_collector.add_facet("/category");
+///         searcher.search(&AllQuery, &mut facet_collector).unwrap();
+///
+///         // this object contains count aggregate for all of the facets.
+///         let counts = facet_collector.harvest();
+///
+///         // This lists all of the facet counts
+///         let facets: Vec<(&Facet, u64)> = counts
+///             .get("/category")
+///             .collect();
+///         assert_eq!(facets, vec![
+///             (&Facet::from("/category/biography"), 1),
+///             (&Facet::from("/category/fiction"), 3)
+///         ]);
+///     }
+///
+///     {
+///			let mut facet_collector = FacetCollector::for_field(facet);
+///         facet_collector.add_facet("/category/fiction");
+///         searcher.search(&AllQuery, &mut facet_collector).unwrap();
+///
+///         // this object contains count aggregate for all of the facets.
+///         let counts = facet_collector.harvest();
+///
+///         // This lists all of the facet counts
+///         let facets: Vec<(&Facet, u64)> = counts
+///             .get("/category/fiction")
+///             .collect();
+///         assert_eq!(facets, vec![
+///             (&Facet::from("/category/fiction/fantasy"), 2),
+///             (&Facet::from("/category/fiction/horror"), 1),
+///             (&Facet::from("/category/fiction/sci-fi"), 1)
+///         ]);
+///     }
+///
+///    {
+///			let mut facet_collector = FacetCollector::for_field(facet);
+///         facet_collector.add_facet("/category/fiction");
+///         searcher.search(&AllQuery, &mut facet_collector).unwrap();
+///
+///         // this object contains count aggregate for all of the facets.
+///         let counts = facet_collector.harvest();
+///
+///         // This lists all of the facet counts
+///         let facets: Vec<(&Facet, u64)> = counts.top_k("/category/fiction", 1);
+///         assert_eq!(facets, vec![
+///             (&Facet::from("/category/fiction/fantasy"), 2)
+///         ]);
+///     }
+///
+///     Ok(())
+/// }
+/// ```
+pub struct FacetCollector {
+    facet_ords: Vec<u64>,
+    field: Field,
+    ff_reader: Option<UnsafeCell<FacetReader>>,
+    segment_counters: Vec<SegmentFacetCounter>,
+
+    // facet_ord -> collapse facet_id
+    current_segment_collapse_mapping: Vec<usize>,
+    // collapse facet_id -> count
+    current_segment_counts: Vec<u64>,
+    // collapse facet_id -> facet_ord
+    current_collapse_facet_ords: Vec<u64>,
+
+    facets: BTreeSet<Facet>,
+}
+
+fn skip<'a, I: Iterator<Item = &'a Facet>>(
+    target: &[u8],
+    collapse_it: &mut Peekable<I>,
+) -> SkipResult {
+    loop {
+        match collapse_it.peek() {
+            Some(facet_bytes) => match facet_bytes.encoded_bytes().cmp(target) {
+                Ordering::Less => {}
+                Ordering::Greater => {
+                    return SkipResult::OverStep;
+                }
+                Ordering::Equal => {
+                    return SkipResult::Reached;
+                }
+            },
+            None => {
+                return SkipResult::End;
+            }
+        }
+        collapse_it.next();
+    }
+}
+
+impl FacetCollector {
+    /// Create a facet collector to collect the facets
+    /// from a specific facet `Field`.
+    ///
+    /// This function does not check whether the field
+    /// is of the proper type.
+    pub fn for_field(field: Field) -> FacetCollector {
+        FacetCollector {
+            facet_ords: Vec::with_capacity(255),
+            segment_counters: Vec::new(),
+            field,
+            ff_reader: None,
+            facets: BTreeSet::new(),
+
+            current_segment_collapse_mapping: Vec::new(),
+            current_collapse_facet_ords: Vec::new(),
+            current_segment_counts: Vec::new(),
+        }
+    }
+
+    /// Adds a facet that we want to record counts
+    ///
+    /// Adding facet `Facet::from("/country")` for instance,
+    /// will record the counts of all of the direct children of the facet country
+    /// (e.g. `/country/FR`, `/country/UK`).
+    ///
+    /// Adding two facets within which one is the prefix of the other is forbidden.
+    /// If you need the correct number of unique documents for two such facets,
+    /// just add them in separate `FacetCollector`.
+    pub fn add_facet<T>(&mut self, facet_from: T)
+    where
+        Facet: From<T>,
+    {
+        let facet = Facet::from(facet_from);
+        for old_facet in &self.facets {
+            assert!(
+                !old_facet.is_prefix_of(&facet),
+                "Tried to add a facet which is a descendant of an already added facet."
+            );
+            assert!(
+                !facet.is_prefix_of(old_facet),
+                "Tried to add a facet which is an ancestor of an already added facet."
+            );
+        }
+        self.facets.insert(facet);
+    }
+
+    fn set_collapse_mapping(&mut self, facet_reader: &FacetReader) {
+        self.current_segment_collapse_mapping.clear();
+        self.current_collapse_facet_ords.clear();
+        self.current_segment_counts.clear();
+        let mut collapse_facet_it = self.facets.iter().peekable();
+        self.current_collapse_facet_ords.push(0);
+        let mut facet_streamer = facet_reader.facet_dict().range().into_stream();
+        if !facet_streamer.advance() {
+            return;
+        }
+        'outer: loop {
+            // at the begining of this loop, facet_streamer
+            // is positionned on a term that has not been processed yet.
+            let skip_result = skip(facet_streamer.key(), &mut collapse_facet_it);
+            match skip_result {
+                SkipResult::Reached => {
+                    // we reach a facet we decided to collapse.
+                    let collapse_depth = facet_depth(facet_streamer.key());
+                    let mut collapsed_id = 0;
+                    self.current_segment_collapse_mapping.push(0);
+                    while facet_streamer.advance() {
+                        let depth = facet_depth(facet_streamer.key());
+                        if depth <= collapse_depth {
+                            continue 'outer;
+                        }
+                        if depth == collapse_depth + 1 {
+                            collapsed_id = self.current_collapse_facet_ords.len();
+                            self.current_collapse_facet_ords
+                                .push(facet_streamer.term_ord());
+                            self.current_segment_collapse_mapping.push(collapsed_id);
+                        } else {
+                            self.current_segment_collapse_mapping.push(collapsed_id);
+                        }
+                    }
+                    break;
+                }
+                SkipResult::End | SkipResult::OverStep => {
+                    self.current_segment_collapse_mapping.push(0);
+                    if !facet_streamer.advance() {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    fn finalize_segment(&mut self) {
+        if self.ff_reader.is_some() {
+            self.segment_counters.push(SegmentFacetCounter {
+                facet_reader: self.ff_reader.take().unwrap().into_inner(),
+                facet_ords: mem::replace(&mut self.current_collapse_facet_ords, Vec::new()),
+                facet_counts: mem::replace(&mut self.current_segment_counts, Vec::new()),
+            });
+        }
+    }
+
+    /// Returns the results of the collection.
+    ///
+    /// This method does not just return the counters,
+    /// it also translates the facet ordinals of the last segment.
+    pub fn harvest(mut self) -> FacetCounts {
+        self.finalize_segment();
+
+        let collapsed_facet_ords: Vec<&[u64]> = self.segment_counters
+            .iter()
+            .map(|segment_counter| &segment_counter.facet_ords[..])
+            .collect();
+        let collapsed_facet_counts: Vec<&[u64]> = self.segment_counters
+            .iter()
+            .map(|segment_counter| &segment_counter.facet_counts[..])
+            .collect();
+
+        let facet_streams = self.segment_counters
+            .iter()
+            .map(|seg_counts| seg_counts.facet_reader.facet_dict().range().into_stream())
+            .collect::<Vec<_>>();
+
+        let mut facet_merger = TermMerger::new(facet_streams);
+        let mut facet_counts = BTreeMap::new();
+
+        while facet_merger.advance() {
+            let count = facet_merger
+                .current_kvs()
+                .iter()
+                .map(|it| {
+                    let seg_ord = it.segment_ord;
+                    let term_ord = it.streamer.term_ord();
+                    collapsed_facet_ords[seg_ord]
+                        .binary_search(&term_ord)
+                        .map(|collapsed_term_id| {
+                            if collapsed_term_id == 0 {
+                                0
+                            } else {
+                                collapsed_facet_counts[seg_ord][collapsed_term_id]
+                            }
+                        })
+                        .unwrap_or(0)
+                })
+                .sum();
+            if count > 0u64 {
+                let bytes = facet_merger.key().to_owned();
+                facet_counts.insert(Facet::from_encoded(bytes), count);
+            }
+        }
+        FacetCounts { facet_counts }
+    }
+}
+
+impl Collector for FacetCollector {
    fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
-        self.ff_reader = Some(reader.get_fast_field_reader(self.field)?);
+        self.finalize_segment();
+        let facet_reader = reader.facet_reader(self.field)?;
+        self.set_collapse_mapping(&facet_reader);
+        self.current_segment_counts
+            .resize(self.current_collapse_facet_ords.len(), 0);
+        self.ff_reader = Some(UnsafeCell::new(facet_reader));
        Ok(())
    }

    fn collect(&mut self, doc: DocId, _: Score) {
-        let val = self.ff_reader
-            .as_ref()
-            .expect("collect() was called before set_segment. This should never happen.")
-            .get(doc);
-        *(self.counters.entry(val).or_insert(0)) += 1;
+        let facet_reader: &mut FacetReader = unsafe {
+            &mut *self.ff_reader
+                .as_ref()
+                .expect("collect() was called before set_segment. This should never happen.")
+                .get()
+        };
+        facet_reader.facet_ords(doc, &mut self.facet_ords);
+        let mut previous_collapsed_ord: usize = usize::MAX;
+        for &facet_ord in &self.facet_ords {
+            let collapsed_ord = self.current_segment_collapse_mapping[facet_ord as usize];
+            self.current_segment_counts[collapsed_ord] += if collapsed_ord == previous_collapsed_ord
+            {
+                0
+            } else {
+                1
+            };
+            previous_collapsed_ord = collapsed_ord;
+        }
+    }
+
+    fn requires_scoring(&self) -> bool {
+        false
+    }
+}
+
+/// Intermediary result of the `FacetCollector` that stores
+/// the facet counts for all the segments.
+pub struct FacetCounts {
+    facet_counts: BTreeMap<Facet, u64>,
+}
+
+impl FacetCounts {
+    #[allow(needless_lifetimes)] //< compiler fails if we remove the lifetime
+    pub fn get<'a, T>(&'a self, facet_from: T) -> impl Iterator<Item = (&'a Facet, u64)>
+    where
+        Facet: From<T>,
+    {
+        let facet = Facet::from(facet_from);
+        let left_bound = Bound::Excluded(facet.clone());
+        let right_bound = if facet.is_root() {
+            Bound::Unbounded
+        } else {
+            let mut facet_after_bytes = facet.encoded_bytes().to_owned();
+            facet_after_bytes.push(1u8);
+            let facet_after = Facet::from_encoded(facet_after_bytes);
+            Bound::Excluded(facet_after)
+        };
+
+        self.facet_counts
+            .range((left_bound, right_bound))
+            .map(|(facet, count)| (facet, *count))
+    }
+
+    pub fn top_k<T>(&self, facet: T, k: usize) -> Vec<(&Facet, u64)>
+    where
+        Facet: From<T>,
+    {
+        let mut heap = BinaryHeap::with_capacity(k);
+        let mut it = self.get(facet);
+
+        for (facet, count) in (&mut it).take(k) {
+            heap.push(Hit { count, facet });
+        }
+
+        let mut lowest_count: u64 = heap.peek().map(|hit| hit.count).unwrap_or(u64::MIN);
+        for (facet, count) in it {
+            if count > lowest_count {
+                lowest_count = count;
+                if let Some(mut head) = heap.peek_mut() {
+                    *head = Hit { count, facet };
+                }
+            }
+        }
+        heap.into_sorted_vec()
+            .into_iter()
+            .map(|hit| (hit.facet, hit.count))
+            .collect::<Vec<_>>()
    }
 }

 #[cfg(test)]
 mod tests {
-
-    use collector::{chain, FacetCollector};
-    use query::QueryParser;
-    use fastfield::{I64FastFieldReader, U64FastFieldReader};
-    use schema::{self, FAST, STRING};
-    use Index;
+    use test::Bencher;
+    use core::Index;
+    use schema::{Document, Facet, SchemaBuilder};
+    use query::AllQuery;
+    use super::{FacetCollector, FacetCounts};
+    use std::iter;
+    use schema::Field;
+    use rand::{thread_rng, Rng};

    #[test]
-    // create 10 documents, set num field value to 0 or 1 for even/odd ones
-    // make sure we have facet counters correctly filled
-    fn test_facet_collector_results() {
-        let mut schema_builder = schema::SchemaBuilder::new();
-        let num_field_i64 = schema_builder.add_i64_field("num_i64", FAST);
-        let num_field_u64 = schema_builder.add_u64_field("num_u64", FAST);
-        let text_field = schema_builder.add_text_field("text", STRING);
+    fn test_facet_collector_drilldown() {
+        let mut schema_builder = SchemaBuilder::new();
+        let facet_field = schema_builder.add_facet_field("facet");
        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);

-        let index = Index::create_in_ram(schema.clone());
-
-        {
-            let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
-            {
-                for i in 0u64..10u64 {
-                    index_writer.add_document(doc!(
-                        num_field_i64 => ((i as i64) % 3i64) as i64,
-                        num_field_u64 => (i % 2u64) as u64,
-                        text_field => "text"
-                    ));
-                }
-            }
-            assert_eq!(index_writer.commit().unwrap(), 10u64);
+        let mut index_writer = index.writer(3_000_000).unwrap();
+        let num_facets: usize = 3 * 4 * 5;
+        let facets: Vec<Facet> = (0..num_facets)
+            .map(|mut n| {
+                let top = n % 3;
+                n /= 3;
+                let mid = n % 4;
+                n /= 4;
+                let leaf = n % 5;
+                Facet::from(&format!("/top{}/mid{}/leaf{}", top, mid, leaf))
+            })
+            .collect();
+        for i in 0..num_facets * 10 {
+            let mut doc = Document::new();
+            doc.add_facet(facet_field, facets[i % num_facets].clone());
+            index_writer.add_document(doc);
        }
-
+        index_writer.commit().unwrap();
        index.load_searchers().unwrap();
        let searcher = index.searcher();
-        let mut ffvf_i64: FacetCollector<I64FastFieldReader> = FacetCollector::new(num_field_i64);
-        let mut ffvf_u64: FacetCollector<U64FastFieldReader> = FacetCollector::new(num_field_u64);

+        let mut facet_collector = FacetCollector::for_field(facet_field);
+        facet_collector.add_facet(Facet::from("/top1"));
+        searcher.search(&AllQuery, &mut facet_collector).unwrap();
+
+        let counts: FacetCounts = facet_collector.harvest();
        {
-            // perform the query
-            let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64);
-            let query_parser = QueryParser::for_index(&index, vec![text_field]);
-            let query = query_parser.parse_query("text:text").unwrap();
-            query.search(&searcher, &mut facet_collectors).unwrap();
+            let facets: Vec<(String, u64)> = counts
+                .get("/top1")
+                .map(|(facet, count)| (facet.to_string(), count))
+                .collect();
+            assert_eq!(
+                facets,
+                [
+                    ("/top1/mid0", 50),
+                    ("/top1/mid1", 50),
+                    ("/top1/mid2", 50),
+                    ("/top1/mid3", 50),
+                ].iter()
+                    .map(|&(facet_str, count)| (String::from(facet_str), count))
+                    .collect::<Vec<_>>()
+            );
        }
+    }

-        assert_eq!(ffvf_u64.counters[&0], 5);
-        assert_eq!(ffvf_u64.counters[&1], 5);
-        assert_eq!(ffvf_i64.counters[&0], 4);
-        assert_eq!(ffvf_i64.counters[&1], 3);
+    #[test]
+    #[should_panic(expected = "Tried to add a facet which is a descendant of \
+                               an already added facet.")]
+    fn test_misused_facet_collector() {
+        let mut facet_collector = FacetCollector::for_field(Field(0));
+        facet_collector.add_facet(Facet::from("/country"));
+        facet_collector.add_facet(Facet::from("/country/europe"));
+    }
+
+    #[test]
+    fn test_non_used_facet_collector() {
+        let mut facet_collector = FacetCollector::for_field(Field(0));
+        facet_collector.add_facet(Facet::from("/country"));
+        facet_collector.add_facet(Facet::from("/countryeurope"));
+    }
+
+    #[test]
+    fn test_facet_collector_topk() {
+        let mut schema_builder = SchemaBuilder::new();
+        let facet_field = schema_builder.add_facet_field("facet");
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+
+        let mut docs: Vec<Document> = vec![("a", 10), ("b", 100), ("c", 7), ("d", 12), ("e", 21)]
+            .into_iter()
+            .flat_map(|(c, count)| {
+                let facet = Facet::from(&format!("/facet_{}", c));
+                let doc = doc!(facet_field => facet);
+                iter::repeat(doc).take(count)
+            })
+            .collect();
+        thread_rng().shuffle(&mut docs[..]);
+
+        let mut index_writer = index.writer(3_000_000).unwrap();
+        for doc in docs {
+            index_writer.add_document(doc);
+        }
+        index_writer.commit().unwrap();
+        index.load_searchers().unwrap();
+
+        let searcher = index.searcher();
+
+        let mut facet_collector = FacetCollector::for_field(facet_field);
+        facet_collector.add_facet("/");
+        searcher.search(&AllQuery, &mut facet_collector).unwrap();
+
+        let counts: FacetCounts = facet_collector.harvest();
+        {
+            let facets: Vec<(&Facet, u64)> = counts.top_k("/", 3);
+            assert_eq!(
+                facets,
+                vec![
+                    (&Facet::from("/facet_b"), 100),
+                    (&Facet::from("/facet_e"), 21),
+                    (&Facet::from("/facet_d"), 12),
+                ]
+            );
+        }
+    }
+
+    #[bench]
+    fn bench_facet_collector(b: &mut Bencher) {
+        let mut schema_builder = SchemaBuilder::new();
+        let facet_field = schema_builder.add_facet_field("facet");
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+
+        let mut docs = vec![];
+        for val in 0..50 {
+            let facet = Facet::from(&format!("/facet_{}", val));
+            for _ in 0..val * val {
+                docs.push(doc!(facet_field=>facet.clone()));
+            }
+        }
+        // 40425 docs
+        thread_rng().shuffle(&mut docs[..]);
+
+        let mut index_writer = index.writer(3_000_000).unwrap();
+        for doc in docs {
+            index_writer.add_document(doc);
+        }
+        index_writer.commit().unwrap();
+        index.load_searchers().unwrap();
+
+        b.iter(|| {
+            let searcher = index.searcher();
+            let mut facet_collector = FacetCollector::for_field(facet_field);
+            searcher.search(&AllQuery, &mut facet_collector).unwrap();
+        });
    }
 }
--- a/src/collector/int_facet_collector.rs
+++ b/src/collector/int_facet_collector.rs
@@ -0,0 +1,123 @@
+use std::cmp::Eq;
+use std::collections::HashMap;
+use std::hash::Hash;
+
+use collector::Collector;
+use fastfield::FastFieldReader;
+use schema::Field;
+
+use DocId;
+use Result;
+use Score;
+use SegmentReader;
+use SegmentLocalId;
+
+
+/// Facet collector  for i64/u64 fast field
+pub struct IntFacetCollector<T>
+where
+    T: FastFieldReader,
+    T::ValueType: Eq + Hash,
+{
+    counters: HashMap<T::ValueType, u64>,
+    field: Field,
+    ff_reader: Option<T>,
+}
+
+
+impl<T> IntFacetCollector<T>
+where
+    T: FastFieldReader,
+    T::ValueType: Eq + Hash,
+{
+    /// Creates a new facet collector for aggregating a given field.
+    pub fn new(field: Field) -> IntFacetCollector<T> {
+        IntFacetCollector {
+            counters: HashMap::new(),
+            field: field,
+            ff_reader: None,
+        }
+    }
+}
+
+
+impl<T> Collector for IntFacetCollector<T>
+where
+    T: FastFieldReader,
+    T::ValueType: Eq + Hash,
+{
+    fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
+        self.ff_reader = Some(reader.get_fast_field_reader(self.field)?);
+        Ok(())
+    }
+
+    fn collect(&mut self, doc: DocId, _: Score) {
+        let val = self.ff_reader
+            .as_ref()
+            .expect(
+                "collect() was called before set_segment. \
+                This should never happen.",
+            )
+            .get(doc);
+        *(self.counters.entry(val).or_insert(0)) += 1;
+    }
+}
+
+
+
+#[cfg(test)]
+mod tests {
+
+    use collector::{chain, IntFacetCollector};
+    use query::QueryParser;
+    use fastfield::{I64FastFieldReader, U64FastFieldReader};
+    use schema::{self, FAST, STRING};
+    use Index;
+
+    #[test]
+    // create 10 documents, set num field value to 0 or 1 for even/odd ones
+    // make sure we have facet counters correctly filled
+    fn test_facet_collector_results() {
+
+        let mut schema_builder = schema::SchemaBuilder::new();
+        let num_field_i64 = schema_builder.add_i64_field("num_i64", FAST);
+        let num_field_u64 = schema_builder.add_u64_field("num_u64", FAST);
+        let text_field = schema_builder.add_text_field("text", STRING);
+        let schema = schema_builder.build();
+
+        let index = Index::create_in_ram(schema.clone());
+
+        {
+            let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
+            {
+                for i in 0u64..10u64 {
+                    index_writer.add_document(doc!(
+                        num_field_i64 => ((i as i64) % 3i64) as i64,
+                        num_field_u64 => (i % 2u64) as u64,
+                        text_field => "text"
+                    ));
+                }
+            }
+            assert_eq!(index_writer.commit().unwrap(), 10u64);
+        }
+
+        index.load_searchers().unwrap();
+        let searcher = index.searcher();
+        let mut ffvf_i64: IntFacetCollector<I64FastFieldReader> = IntFacetCollector::new(num_field_i64);
+        let mut ffvf_u64: IntFacetCollector<U64FastFieldReader> = IntFacetCollector::new(num_field_u64);
+
+        {
+            // perform the query
+            let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64);
+            let mut query_parser = QueryParser::for_index(index, vec![text_field]);
+            let query = query_parser.parse_query("text:text").unwrap();
+            query.search(&searcher, &mut facet_collectors).unwrap();
+        }
+
+        assert_eq!(ffvf_u64.counters[&0], 5);
+        assert_eq!(ffvf_u64.counters[&1], 5);
+        assert_eq!(ffvf_i64.counters[&0], 4);
+        assert_eq!(ffvf_i64.counters[&1], 3);
+
+    }
+}
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -62,6 +62,9 @@ pub trait Collector {
    ) -> Result<()>;
    /// The query pushes the scored document to the collector via this method.
    fn collect(&mut self, doc: DocId, score: Score);
+
+    /// Returns true iff the collector requires to compute scores for documents.
+    fn requires_scoring(&self) -> bool;
 }

 impl<'a, C: Collector> Collector for &'a mut C {
@@ -74,7 +77,11 @@ impl<'a, C: Collector> Collector for &'a mut C {
    }
    /// The query pushes the scored document to the collector via this method.
    fn collect(&mut self, doc: DocId, score: Score) {
-        (*self).collect(doc, score);
+        C::collect(self, doc, score)
+    }
+
+    fn requires_scoring(&self) -> bool {
+        C::requires_scoring(self)
    }
 }

@@ -87,7 +94,6 @@ pub mod tests {
    use Score;
    use core::SegmentReader;
    use SegmentLocalId;
-    use fastfield::U64FastFieldReader;
    use fastfield::FastFieldReader;
    use schema::Field;

@@ -99,6 +105,7 @@ pub mod tests {
        offset: DocId,
        segment_max_doc: DocId,
        docs: Vec<DocId>,
+        scores: Vec<Score>,
    }

    impl TestCollector {
@@ -106,14 +113,19 @@ pub mod tests {
        pub fn docs(self) -> Vec<DocId> {
            self.docs
        }
+
+        pub fn scores(self) -> Vec<Score> {
+            self.scores
+        }
    }

    impl Default for TestCollector {
        fn default() -> TestCollector {
            TestCollector {
-                docs: Vec::new(),
                offset: 0,
                segment_max_doc: 0,
+                docs: Vec::new(),
+                scores: Vec::new(),
            }
        }
    }
@@ -125,8 +137,13 @@ pub mod tests {
            Ok(())
        }

-        fn collect(&mut self, doc: DocId, _score: Score) {
+        fn collect(&mut self, doc: DocId, score: Score) {
            self.docs.push(doc + self.offset);
+            self.scores.push(score);
+        }
+
+        fn requires_scoring(&self) -> bool {
+            true
        }
    }

@@ -137,14 +154,14 @@ pub mod tests {
    pub struct FastFieldTestCollector {
        vals: Vec<u64>,
        field: Field,
-        ff_reader: Option<U64FastFieldReader>,
+        ff_reader: Option<FastFieldReader<u64>>,
    }

    impl FastFieldTestCollector {
        pub fn for_field(field: Field) -> FastFieldTestCollector {
            FastFieldTestCollector {
                vals: Vec::new(),
-                field: field,
+                field,
                ff_reader: None,
            }
        }
@@ -156,7 +173,7 @@ pub mod tests {

    impl Collector for FastFieldTestCollector {
        fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
-            self.ff_reader = Some(reader.get_fast_field_reader(self.field)?);
+            self.ff_reader = Some(reader.fast_field_reader(self.field)?);
            Ok(())
        }

@@ -164,6 +181,9 @@ pub mod tests {
            let val = self.ff_reader.as_ref().unwrap().get(doc);
            self.vals.push(val);
        }
+        fn requires_scoring(&self) -> bool {
+            false
+        }
    }

    #[bench]
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -16,9 +16,7 @@ pub struct MultiCollector<'a> {
 impl<'a> MultiCollector<'a> {
    /// Constructor
    pub fn from(collectors: Vec<&'a mut Collector>) -> MultiCollector {
-        MultiCollector {
-            collectors: collectors,
-        }
+        MultiCollector { collectors }
    }
 }

@@ -29,7 +27,7 @@ impl<'a> Collector for MultiCollector<'a> {
        segment: &SegmentReader,
    ) -> Result<()> {
        for collector in &mut self.collectors {
-            try!(collector.set_segment(segment_local_id, segment));
+            collector.set_segment(segment_local_id, segment)?;
        }
        Ok(())
    }
@@ -39,6 +37,11 @@ impl<'a> Collector for MultiCollector<'a> {
            collector.collect(doc, score);
        }
    }
+    fn requires_scoring(&self) -> bool {
+        self.collectors
+            .iter()
+            .any(|collector| collector.requires_scoring())
+    }
 }

 #[cfg(test)]
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -60,7 +60,7 @@ impl TopCollector {
            panic!("Limit must be strictly greater than 0.");
        }
        TopCollector {
-            limit: limit,
+            limit,
            heap: BinaryHeap::with_capacity(limit),
            segment_id: 0,
        }
@@ -119,12 +119,16 @@ impl Collector for TopCollector {
            }
        } else {
            let wrapped_doc = GlobalScoredDoc {
-                score: score,
+                score,
                doc_address: DocAddress(self.segment_id, doc),
            };
            self.heap.push(wrapped_doc);
        }
    }
+
+    fn requires_scoring(&self) -> bool {
+        true
+    }
 }

 #[cfg(test)]
--- a/src/common/bitpacker.rs
+++ b/src/common/bitpacker.rs
@@ -3,65 +3,37 @@ use std::io;
 use common::serialize::BinarySerializable;
 use std::mem;
 use std::ops::Deref;
+use std::ptr;

-/// Computes the number of bits that will be used for bitpacking.
-///
-/// In general the target is the minimum number of bits
-/// required to express the amplitude given in argument.
-///
-/// e.g. If the amplitude is 10, we can store all ints on simply 4bits.
-///
-/// The logic is slightly more convoluted here as for optimization
-/// reasons, we want to ensure that a value spawns over at most 8 bytes
-/// of aligns bytes.
-///
-/// Spanning over 9 bytes is possible for instance, if we do
-/// bitpacking with an amplitude of 63 bits.
-/// In this case, the second int will start on bit
-/// 63 (which belongs to byte 7) and ends at byte 15;
-/// Hence 9 bytes (from byte 7 to byte 15 included).
-///
-/// To avoid this, we force the number of bits to 64bits
-/// when the result is greater than `64-8 = 56 bits`.
-///
-/// Note that this only affects rare use cases spawning over
-/// a very large range of values. Even in this case, it results
-/// in an extra cost of at most 12% compared to the optimal
-/// number of bits.
-pub fn compute_num_bits(amplitude: u64) -> u8 {
-    let amplitude = (64u32 - amplitude.leading_zeros()) as u8;
-    if amplitude <= 64 - 8 {
-        amplitude
-    } else {
-        64
-    }
-}
-
-pub struct BitPacker {
+pub(crate) struct BitPacker {
    mini_buffer: u64,
    mini_buffer_written: usize,
-    num_bits: usize,
 }

 impl BitPacker {
-    pub fn new(num_bits: usize) -> BitPacker {
+    pub fn new() -> BitPacker {
        BitPacker {
            mini_buffer: 0u64,
            mini_buffer_written: 0,
-            num_bits,
        }
    }

-    pub fn write<TWrite: Write>(&mut self, val: u64, output: &mut TWrite) -> io::Result<()> {
+    pub fn write<TWrite: Write>(
+        &mut self,
+        val: u64,
+        num_bits: u8,
+        output: &mut TWrite,
+    ) -> io::Result<()> {
        let val_u64 = val as u64;
-        if self.mini_buffer_written + self.num_bits > 64 {
+        let num_bits = num_bits as usize;
+        if self.mini_buffer_written + num_bits > 64 {
            self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32);
            self.mini_buffer.serialize(output)?;
            self.mini_buffer = val_u64.wrapping_shr((64 - self.mini_buffer_written) as u32);
-            self.mini_buffer_written = self.mini_buffer_written + (self.num_bits as usize) - 64;
+            self.mini_buffer_written = self.mini_buffer_written + num_bits - 64;
        } else {
            self.mini_buffer |= val_u64 << self.mini_buffer_written;
-            self.mini_buffer_written += self.num_bits;
+            self.mini_buffer_written += num_bits;
            if self.mini_buffer_written == 64 {
                self.mini_buffer.serialize(output)?;
                self.mini_buffer_written = 0;
@@ -71,7 +43,7 @@ impl BitPacker {
        Ok(())
    }

-    pub(crate) fn flush<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
+    pub fn flush<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
        if self.mini_buffer_written > 0 {
            let num_bytes = (self.mini_buffer_written + 7) / 8;
            let arr: [u8; 8] = unsafe { mem::transmute::<u64, [u8; 8]>(self.mini_buffer) };
@@ -89,6 +61,7 @@ impl BitPacker {
    }
 }

+#[derive(Clone)]
 pub struct BitUnpacker<Data>
 where
    Data: Deref<Target = [u8]>,
@@ -102,14 +75,14 @@ impl<Data> BitUnpacker<Data>
 where
    Data: Deref<Target = [u8]>,
 {
-    pub fn new(data: Data, num_bits: usize) -> BitUnpacker<Data> {
+    pub fn new(data: Data, num_bits: u8) -> BitUnpacker<Data> {
        let mask: u64 = if num_bits == 64 {
            !0u64
        } else {
            (1u64 << num_bits) - 1u64
        };
        BitUnpacker {
-            num_bits,
+            num_bits: num_bits as usize,
            mask,
            data,
        }
@@ -117,7 +90,7 @@ where

    pub fn get(&self, idx: usize) -> u64 {
        if self.num_bits == 0 {
-            return 0;
+            return 0u64;
        }
        let data: &[u8] = &*self.data;
        let num_bits = self.num_bits;
@@ -133,28 +106,32 @@ where
                addr + 8 <= data.len(),
                "The fast field field should have been padded with 7 bytes."
            );
-            let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) };
+            let val_unshifted_unmasked: u64 = unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
            let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
-            (val_shifted & mask)
+            val_shifted & mask
        } else {
            let val_unshifted_unmasked: u64 = if addr + 8 <= data.len() {
-                unsafe { *(data[addr..].as_ptr() as *const u64) }
+                unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) }
            } else {
                let mut buffer = [0u8; 8];
                for i in addr..data.len() {
                    buffer[i - addr] += data[i];
                }
-                unsafe { *(buffer[..].as_ptr() as *const u64) }
+                unsafe { ptr::read_unaligned(buffer[..].as_ptr() as *const u64) }
            };
-            let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
-            (val_shifted & mask)
+            let val_shifted = val_unshifted_unmasked >> (bit_shift as u64);
+            val_shifted & mask
        }
    }

+    /// Reads a range of values from the fast field.
+    ///
+    /// The range of values read is from
+    /// `[start..start + output.len()[`
    pub fn get_range(&self, start: u32, output: &mut [u64]) {
        if self.num_bits == 0 {
            for val in output.iter_mut() {
-                *val = 0;
+                *val = 0u64;
            }
        } else {
            let data: &[u8] = &*self.data;
@@ -164,7 +141,7 @@ where
            for output_val in output.iter_mut() {
                let addr = addr_in_bits >> 3;
                let bit_shift = addr_in_bits & 7;
-                let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) };
+                let val_unshifted_unmasked: u64 = unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
                let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
                *output_val = val_shifted & mask;
                addr_in_bits += num_bits;
@@ -175,37 +152,25 @@ where

 #[cfg(test)]
 mod test {
-    use super::{compute_num_bits, BitPacker, BitUnpacker};
+    use super::{BitPacker, BitUnpacker};

-    #[test]
-    fn test_compute_num_bits() {
-        assert_eq!(compute_num_bits(1), 1u8);
-        assert_eq!(compute_num_bits(0), 0u8);
-        assert_eq!(compute_num_bits(2), 2u8);
-        assert_eq!(compute_num_bits(3), 2u8);
-        assert_eq!(compute_num_bits(4), 3u8);
-        assert_eq!(compute_num_bits(255), 8u8);
-        assert_eq!(compute_num_bits(256), 9u8);
-        assert_eq!(compute_num_bits(5_000_000_000), 33u8);
-    }
-
-    fn create_fastfield_bitpacker(len: usize, num_bits: usize) -> (BitUnpacker<Vec<u8>>, Vec<u64>) {
+    fn create_fastfield_bitpacker(len: usize, num_bits: u8) -> (BitUnpacker<Vec<u8>>, Vec<u64>) {
        let mut data = Vec::new();
-        let mut bitpacker = BitPacker::new(num_bits);
-        let max_val: u64 = (1 << num_bits) - 1;
+        let mut bitpacker = BitPacker::new();
+        let max_val: u64 = (1u64 << num_bits as u64) - 1u64;
        let vals: Vec<u64> = (0u64..len as u64)
            .map(|i| if max_val == 0 { 0 } else { i % max_val })
            .collect();
        for &val in &vals {
-            bitpacker.write(val, &mut data).unwrap();
+            bitpacker.write(val, num_bits, &mut data).unwrap();
        }
        bitpacker.close(&mut data).unwrap();
-        assert_eq!(data.len(), (num_bits * len + 7) / 8 + 7);
+        assert_eq!(data.len(), ((num_bits as usize) * len + 7) / 8 + 7);
        let bitunpacker = BitUnpacker::new(data, num_bits);
        (bitunpacker, vals)
    }

-    fn test_bitpacker_util(len: usize, num_bits: usize) {
+    fn test_bitpacker_util(len: usize, num_bits: u8) {
        let (bitunpacker, vals) = create_fastfield_bitpacker(len, num_bits);
        for (i, val) in vals.iter().enumerate() {
            assert_eq!(bitunpacker.get(i), *val);
--- a/src/common/bitset.rs
+++ b/src/common/bitset.rs
@@ -0,0 +1,389 @@
+use std::fmt;
+use std::u64;
+
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub(crate) struct TinySet(u64);
+
+impl fmt::Debug for TinySet {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.into_iter().collect::<Vec<u32>>().fmt(f)
+    }
+}
+
+pub struct TinySetIterator(TinySet);
+impl Iterator for TinySetIterator {
+    type Item = u32;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.0.pop_lowest()
+    }
+}
+
+impl IntoIterator for TinySet {
+    type Item = u32;
+    type IntoIter = TinySetIterator;
+    fn into_iter(self) -> Self::IntoIter {
+        TinySetIterator(self)
+    }
+}
+
+impl TinySet {
+    /// Returns an empty `TinySet`.
+    pub fn empty() -> TinySet {
+        TinySet(0u64)
+    }
+
+    /// Returns the complement of the set in `[0, 64[`.
+    fn complement(&self) -> TinySet {
+        TinySet(!self.0)
+    }
+
+    /// Returns true iff the `TinySet` contains the element `el`.
+    pub fn contains(&self, el: u32) -> bool {
+        !self.intersect(TinySet::singleton(el)).is_empty()
+    }
+
+    /// Returns the intersection of `self` and `other`
+    pub fn intersect(&self, other: TinySet) -> TinySet {
+        TinySet(self.0 & other.0)
+    }
+
+    /// Creates a new `TinySet` containing only one element
+    /// within `[0; 64[`
+    #[inline(always)]
+    pub fn singleton(el: u32) -> TinySet {
+        TinySet(1u64 << u64::from(el))
+    }
+
+    /// Insert a new element within [0..64[
+    #[inline(always)]
+    pub fn insert(self, el: u32) -> TinySet {
+        self.union(TinySet::singleton(el))
+    }
+
+    /// Insert a new element within [0..64[
+    #[inline(always)]
+    pub fn insert_mut(&mut self, el: u32) -> bool {
+        let old = *self;
+        *self = old.insert(el);
+        old != *self
+    }
+
+    /// Returns the union of two tinysets
+    #[inline(always)]
+    pub fn union(self, other: TinySet) -> TinySet {
+        TinySet(self.0 | other.0)
+    }
+
+    /// Returns true iff the `TinySet` is empty.
+    #[inline(always)]
+    pub fn is_empty(&self) -> bool {
+        self.0 == 0u64
+    }
+
+    /// Returns the lowest element in the `TinySet`
+    /// and removes it.
+    #[inline(always)]
+    pub fn pop_lowest(&mut self) -> Option<u32> {
+        if self.is_empty() {
+            None
+        } else {
+            let lowest = self.0.trailing_zeros() as u32;
+            self.0 ^= TinySet::singleton(lowest).0;
+            Some(lowest)
+        }
+    }
+
+    /// Returns a `TinySet` than contains all values up
+    /// to limit excluded.
+    ///
+    /// The limit is assumed to be strictly lower than 64.
+    pub fn range_lower(upper_bound: u32) -> TinySet {
+        TinySet((1u64 << u64::from(upper_bound % 64u32)) - 1u64)
+    }
+
+    /// Returns a `TinySet` that contains all values greater
+    /// or equal to the given limit, included. (and up to 63)
+    ///
+    /// The limit is assumed to be strictly lower than 64.
+    pub fn range_greater_or_equal(from_included: u32) -> TinySet {
+        TinySet::range_lower(from_included).complement()
+    }
+
+    pub fn clear(&mut self) {
+        self.0 = 0u64;
+    }
+
+    pub fn len(&self) -> u32 {
+        self.0.count_ones()
+    }
+}
+
+#[derive(Clone)]
+pub struct BitSet {
+    tinysets: Box<[TinySet]>,
+    len: usize, //< Technically it should be u32, but we
+    // count multiple inserts.
+    // `usize` guards us from overflow.
+    max_value: u32,
+}
+
+fn num_buckets(max_val: u32) -> u32 {
+    (max_val + 63u32) / 64u32
+}
+
+impl BitSet {
+    /// Create a new `BitSet` that may contain elements
+    /// within `[0, max_val[`.
+    pub fn with_max_value(max_value: u32) -> BitSet {
+        let num_buckets = num_buckets(max_value);
+        let tinybisets = vec![TinySet::empty(); num_buckets as usize].into_boxed_slice();
+        BitSet {
+            tinysets: tinybisets,
+            len: 0,
+            max_value,
+        }
+    }
+
+    /// Removes all elements from the `BitSet`.
+    pub fn clear(&mut self) {
+        for tinyset in self.tinysets.iter_mut() {
+            *tinyset = TinySet::empty();
+        }
+    }
+
+    /// Returns the number of elements in the `BitSet`.
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Inserts an element in the `BitSet`
+    pub fn insert(&mut self, el: u32) {
+        // we do not check saturated els.
+        let higher = el / 64u32;
+        let lower = el % 64u32;
+        self.len += if self.tinysets[higher as usize].insert_mut(lower) {
+            1
+        } else {
+            0
+        };
+    }
+
+    /// Returns true iff the elements is in the `BitSet`.
+    pub fn contains(&self, el: u32) -> bool {
+        self.tinyset(el / 64u32).contains(el % 64)
+    }
+
+    /// Returns the first non-empty `TinySet` associated to a bucket lower
+    /// or greater than bucket.
+    ///
+    /// Reminder: the tiny set with the bucket `bucket`, represents the
+    /// elements from `bucket * 64` to `(bucket+1) * 64`.
+    pub(crate) fn first_non_empty_bucket(&self, bucket: u32) -> Option<u32> {
+        self.tinysets[bucket as usize..]
+            .iter()
+            .cloned()
+            .position(|tinyset| !tinyset.is_empty())
+            .map(|delta_bucket| bucket + delta_bucket as u32)
+    }
+
+    pub fn max_value(&self) -> u32 {
+        self.max_value
+    }
+
+    /// Returns the tiny bitset representing the
+    /// the set restricted to the number range from
+    /// `bucket * 64` to `(bucket + 1) * 64`.
+    pub(crate) fn tinyset(&self, bucket: u32) -> TinySet {
+        self.tinysets[bucket as usize]
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    extern crate test;
+    use tests;
+    use std::collections::HashSet;
+    use super::BitSet;
+    use super::TinySet;
+    use tests::generate_nonunique_unsorted;
+    use std::collections::BTreeSet;
+    use query::BitSetDocSet;
+    use docset::DocSet;
+
+    #[test]
+    fn test_tiny_set() {
+        assert!(TinySet::empty().is_empty());
+        {
+            let mut u = TinySet::empty().insert(1u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert!(u.pop_lowest().is_none())
+        }
+        {
+            let mut u = TinySet::empty().insert(1u32).insert(1u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert!(u.pop_lowest().is_none())
+        }
+        {
+            let mut u = TinySet::empty().insert(2u32);
+            assert_eq!(u.pop_lowest(), Some(2u32));
+            u.insert_mut(1u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty().insert(63u32);
+            assert_eq!(u.pop_lowest(), Some(63u32));
+            assert!(u.pop_lowest().is_none());
+        }
+    }
+
+    #[test]
+    fn test_bitset() {
+        let test_against_hashset = |els: &[u32], max_value: u32| {
+            let mut hashset: HashSet<u32> = HashSet::new();
+            let mut bitset = BitSet::with_max_value(max_value);
+            for &el in els {
+                assert!(el < max_value);
+                hashset.insert(el);
+                bitset.insert(el);
+            }
+            for el in 0..max_value {
+                assert_eq!(hashset.contains(&el), bitset.contains(el));
+            }
+            assert_eq!(bitset.max_value(), max_value);
+        };
+
+        test_against_hashset(&[], 0);
+        test_against_hashset(&[], 1);
+        test_against_hashset(&[0u32], 1);
+        test_against_hashset(&[0u32], 100);
+        test_against_hashset(&[1u32, 2u32], 4);
+        test_against_hashset(&[99u32], 100);
+        test_against_hashset(&[63u32], 64);
+        test_against_hashset(&[62u32, 63u32], 64);
+    }
+
+    #[test]
+    fn test_bitset_large() {
+        let arr = generate_nonunique_unsorted(1_000_000, 50_000);
+        let mut btreeset: BTreeSet<u32> = BTreeSet::new();
+        let mut bitset = BitSet::with_max_value(1_000_000);
+        for el in arr {
+            btreeset.insert(el);
+            bitset.insert(el);
+        }
+        for i in 0..1_000_000 {
+            assert_eq!(btreeset.contains(&i), bitset.contains(i));
+        }
+        assert_eq!(btreeset.len(), bitset.len());
+        let mut bitset_docset = BitSetDocSet::from(bitset);
+        for el in btreeset.into_iter() {
+            bitset_docset.advance();
+            assert_eq!(bitset_docset.doc(), el);
+        }
+        assert!(!bitset_docset.advance());
+    }
+
+    #[test]
+    fn test_bitset_num_buckets() {
+        use super::num_buckets;
+        assert_eq!(num_buckets(0u32), 0);
+        assert_eq!(num_buckets(1u32), 1);
+        assert_eq!(num_buckets(64u32), 1);
+        assert_eq!(num_buckets(65u32), 2);
+        assert_eq!(num_buckets(128u32), 2);
+        assert_eq!(num_buckets(129u32), 3);
+    }
+
+    #[test]
+    fn test_tinyset_range() {
+        assert_eq!(
+            TinySet::range_lower(3).into_iter().collect::<Vec<u32>>(),
+            [0, 1, 2]
+        );
+        assert!(TinySet::range_lower(0).is_empty());
+        assert_eq!(
+            TinySet::range_lower(63).into_iter().collect::<Vec<u32>>(),
+            (0u32..63u32).collect::<Vec<_>>()
+        );
+        assert_eq!(
+            TinySet::range_lower(1).into_iter().collect::<Vec<u32>>(),
+            [0]
+        );
+        assert_eq!(
+            TinySet::range_lower(2).into_iter().collect::<Vec<u32>>(),
+            [0, 1]
+        );
+        assert_eq!(
+            TinySet::range_greater_or_equal(3)
+                .into_iter()
+                .collect::<Vec<u32>>(),
+            (3u32..64u32).collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn test_bitset_len() {
+        let mut bitset = BitSet::with_max_value(1_000);
+        assert_eq!(bitset.len(), 0);
+        bitset.insert(3u32);
+        assert_eq!(bitset.len(), 1);
+        bitset.insert(103u32);
+        assert_eq!(bitset.len(), 2);
+        bitset.insert(3u32);
+        assert_eq!(bitset.len(), 2);
+        bitset.insert(103u32);
+        assert_eq!(bitset.len(), 2);
+        bitset.insert(104u32);
+        assert_eq!(bitset.len(), 3);
+    }
+
+    #[test]
+    fn test_bitset_clear() {
+        let mut bitset = BitSet::with_max_value(1_000);
+        let els = tests::sample(1_000, 0.01f32);
+        for &el in &els {
+            bitset.insert(el);
+        }
+        assert!(els.iter().all(|el| bitset.contains(*el)));
+        bitset.clear();
+        for el in 0u32..1000u32 {
+            assert!(!bitset.contains(el));
+        }
+    }
+
+    #[bench]
+    fn bench_tinyset_pop(b: &mut test::Bencher) {
+        b.iter(|| {
+            let mut tinyset = TinySet::singleton(test::black_box(31u32));
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+            tinyset.pop_lowest();
+        });
+    }
+
+    #[bench]
+    fn bench_tinyset_sum(b: &mut test::Bencher) {
+        let tiny_set = TinySet::empty().insert(10u32).insert(14u32).insert(21u32);
+        b.iter(|| {
+            assert_eq!(test::black_box(tiny_set).into_iter().sum::<u32>(), 45u32);
+        });
+    }
+
+    #[bench]
+    fn bench_tinyarr_sum(b: &mut test::Bencher) {
+        let v = [10u32, 14u32, 21u32];
+        b.iter(|| test::black_box(v).iter().cloned().sum::<u32>());
+    }
+
+    #[bench]
+    fn bench_bitset_initialize(b: &mut test::Bencher) {
+        b.iter(|| BitSet::with_max_value(1_000_000));
+    }
+
+}
--- a/src/common/composite_file.rs
+++ b/src/common/composite_file.rs
@@ -4,14 +4,43 @@ use std::collections::HashMap;
 use schema::Field;
 use common::VInt;
 use directory::WritePtr;
-use std::io;
+use std::io::{self, Read};
 use directory::ReadOnlySource;
 use common::BinarySerializable;

+#[derive(Eq, PartialEq, Hash, Copy, Ord, PartialOrd, Clone, Debug)]
+pub struct FileAddr {
+    field: Field,
+    idx: usize,
+}
+
+impl FileAddr {
+    fn new(field: Field, idx: usize) -> FileAddr {
+        FileAddr { field, idx }
+    }
+}
+
+impl BinarySerializable for FileAddr {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+        self.field.serialize(writer)?;
+        VInt(self.idx as u64).serialize(writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+        let field = Field::deserialize(reader)?;
+        let idx = VInt::deserialize(reader)?.0 as usize;
+        Ok(FileAddr {
+            field,
+            idx,
+        })
+    }
+}
+
 /// A `CompositeWrite` is used to write a `CompositeFile`.
 pub struct CompositeWrite<W = WritePtr> {
    write: CountingWriter<W>,
-    offsets: HashMap<Field, usize>,
+    offsets: HashMap<FileAddr, usize>,
 }

 impl<W: Write> CompositeWrite<W> {
@@ -26,9 +55,15 @@ impl<W: Write> CompositeWrite<W> {

    /// Start writing a new field.
    pub fn for_field(&mut self, field: Field) -> &mut CountingWriter<W> {
+        self.for_field_with_idx(field, 0)
+    }
+
+    /// Start writing a new field.
+    pub fn for_field_with_idx(&mut self, field: Field, idx: usize) -> &mut CountingWriter<W> {
        let offset = self.write.written_bytes();
-        assert!(!self.offsets.contains_key(&field));
-        self.offsets.insert(field, offset);
+        let file_addr = FileAddr::new(field, idx);
+        assert!(!self.offsets.contains_key(&file_addr));
+        self.offsets.insert(file_addr, offset);
        &mut self.write
    }

@@ -42,16 +77,16 @@ impl<W: Write> CompositeWrite<W> {

        let mut offset_fields: Vec<_> = self.offsets
            .iter()
-            .map(|(field, offset)| (offset, field))
+            .map(|(file_addr, offset)| (*offset, *file_addr))
            .collect();

        offset_fields.sort();

        let mut prev_offset = 0;
-        for (offset, field) in offset_fields {
+        for (offset, file_addr) in offset_fields {
            VInt((offset - prev_offset) as u64).serialize(&mut self.write)?;
-            field.serialize(&mut self.write)?;
-            prev_offset = *offset;
+            file_addr.serialize(&mut self.write)?;
+            prev_offset = offset;
        }

        let footer_len = (self.write.written_bytes() - footer_offset) as u32;
@@ -70,7 +105,7 @@ impl<W: Write> CompositeWrite<W> {
 #[derive(Clone)]
 pub struct CompositeFile {
    data: ReadOnlySource,
-    offsets_index: HashMap<Field, (usize, usize)>,
+    offsets_index: HashMap<FileAddr, (usize, usize)>,
 }

 impl CompositeFile {
@@ -86,7 +121,7 @@ impl CompositeFile {
        let mut footer_buffer = footer_data.as_slice();
        let num_fields = VInt::deserialize(&mut footer_buffer)?.0 as usize;

-        let mut fields = vec![];
+        let mut file_addrs = vec![];
        let mut offsets = vec![];

        let mut field_index = HashMap::new();
@@ -94,16 +129,16 @@ impl CompositeFile {
        let mut offset = 0;
        for _ in 0..num_fields {
            offset += VInt::deserialize(&mut footer_buffer)?.0 as usize;
-            let field = Field::deserialize(&mut footer_buffer)?;
+            let file_addr = FileAddr::deserialize(&mut footer_buffer)?;
            offsets.push(offset);
-            fields.push(field);
+            file_addrs.push(file_addr);
        }
        offsets.push(footer_start);
        for i in 0..num_fields {
-            let field = fields[i];
+            let file_addr = file_addrs[i];
            let start_offset = offsets[i];
            let end_offset = offsets[i + 1];
-            field_index.insert(field, (start_offset, end_offset));
+            field_index.insert(file_addr, (start_offset, end_offset));
        }

        Ok(CompositeFile {
@@ -124,8 +159,14 @@ impl CompositeFile {
    /// Returns the `ReadOnlySource` associated
    /// to a given `Field` and stored in a `CompositeFile`.
    pub fn open_read(&self, field: Field) -> Option<ReadOnlySource> {
+        self.open_read_with_idx(field, 0)
+    }
+
+    /// Returns the `ReadOnlySource` associated
+    /// to a given `Field` and stored in a `CompositeFile`.
+    pub fn open_read_with_idx(&self, field: Field, idx: usize) -> Option<ReadOnlySource> {
        self.offsets_index
-            .get(&field)
+            .get(&FileAddr { field, idx, })
            .map(|&(from, to)| self.data.slice(from, to))
    }
 }
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -1,22 +1,59 @@
-mod serialize;
-mod timer;
+    mod serialize;
 mod vint;
 mod counting_writer;
 mod composite_file;
 pub mod bitpacker;
+mod bitset;

 pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
-pub use self::serialize::BinarySerializable;
-pub use self::timer::Timing;
-pub use self::timer::TimerTree;
-pub use self::timer::OpenTimer;
+pub use self::serialize::{BinarySerializable, FixedSize};
 pub use self::vint::VInt;
 pub use self::counting_writer::CountingWriter;
+pub use self::bitset::BitSet;
+pub(crate) use self::bitset::TinySet;
+pub use byteorder::LittleEndian as Endianness;

 use std::io;

+/// Computes the number of bits that will be used for bitpacking.
+///
+/// In general the target is the minimum number of bits
+/// required to express the amplitude given in argument.
+///
+/// e.g. If the amplitude is 10, we can store all ints on simply 4bits.
+///
+/// The logic is slightly more convoluted here as for optimization
+/// reasons, we want to ensure that a value spawns over at most 8 bytes
+/// of aligns bytes.
+///
+/// Spanning over 9 bytes is possible for instance, if we do
+/// bitpacking with an amplitude of 63 bits.
+/// In this case, the second int will start on bit
+/// 63 (which belongs to byte 7) and ends at byte 15;
+/// Hence 9 bytes (from byte 7 to byte 15 included).
+///
+/// To avoid this, we force the number of bits to 64bits
+/// when the result is greater than `64-8 = 56 bits`.
+///
+/// Note that this only affects rare use cases spawning over
+/// a very large range of values. Even in this case, it results
+/// in an extra cost of at most 12% compared to the optimal
+/// number of bits.
+pub(crate) fn compute_num_bits(n: u64) -> u8 {
+    let amplitude = (64u32 - n.leading_zeros()) as u8;
+    if amplitude <= 64 - 8 {
+        amplitude
+    } else {
+        64
+    }
+}
+
+pub(crate) fn is_power_of_2(n: usize) -> bool {
+    (n > 0) && (n & (n - 1) == 0)
+}
+
 /// Create a default io error given a string.
-pub fn make_io_err(msg: String) -> io::Error {
+pub(crate) fn make_io_err(msg: String) -> io::Error {
    io::Error::new(io::ErrorKind::Other, msg)
 }

@@ -65,9 +102,10 @@ pub fn u64_to_i64(val: u64) -> i64 {
 }

 #[cfg(test)]
-mod test {
+pub(crate) mod test {

-    use super::{i64_to_u64, u64_to_i64};
+    use super::{compute_num_bits, i64_to_u64, u64_to_i64};
+    pub use super::serialize::test::fixed_size_test;

    fn test_i64_converter_helper(val: i64) {
        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
@@ -84,4 +122,16 @@ mod test {
            test_i64_converter_helper(i);
        }
    }
+
+    #[test]
+    fn test_compute_num_bits() {
+        assert_eq!(compute_num_bits(1), 1u8);
+        assert_eq!(compute_num_bits(0), 0u8);
+        assert_eq!(compute_num_bits(2), 2u8);
+        assert_eq!(compute_num_bits(3), 2u8);
+        assert_eq!(compute_num_bits(4), 3u8);
+        assert_eq!(compute_num_bits(255), 8u8);
+        assert_eq!(compute_num_bits(256), 9u8);
+        assert_eq!(compute_num_bits(5_000_000_000), 33u8);
+    }
 }
--- a/src/common/serialize.rs
+++ b/src/common/serialize.rs
@@ -1,16 +1,25 @@
 use byteorder::{ReadBytesExt, WriteBytesExt};
-use byteorder::LittleEndian as Endianness;
+use common::Endianness;
 use std::fmt;
 use std::io::Write;
 use std::io::Read;
 use std::io;
 use common::VInt;

+/// Trait for a simple binary serialization.
 pub trait BinarySerializable: fmt::Debug + Sized {
+    /// Serialize
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()>;
+    /// Deserialize
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self>;
 }

+/// `FixedSize` marks a `BinarySerializable` as
+/// always serializing to the same size.
+pub trait FixedSize: BinarySerializable {
+    const SIZE_IN_BYTES: usize;
+}
+
 impl BinarySerializable for () {
    fn serialize<W: Write>(&self, _: &mut W) -> io::Result<()> {
        Ok(())
@@ -20,6 +29,10 @@ impl BinarySerializable for () {
    }
 }

+impl FixedSize for () {
+    const SIZE_IN_BYTES: usize = 0;
+}
+
 impl<T: BinarySerializable> BinarySerializable for Vec<T> {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        VInt(self.len() as u64).serialize(writer)?;
@@ -59,6 +72,10 @@ impl BinarySerializable for u32 {
    }
 }

+impl FixedSize for u32 {
+    const SIZE_IN_BYTES: usize = 4;
+}
+
 impl BinarySerializable for u64 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u64::<Endianness>(*self)
@@ -68,6 +85,10 @@ impl BinarySerializable for u64 {
    }
 }

+impl FixedSize for u64 {
+    const SIZE_IN_BYTES: usize = 8;
+}
+
 impl BinarySerializable for i64 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_i64::<Endianness>(*self)
@@ -77,6 +98,10 @@ impl BinarySerializable for i64 {
    }
 }

+impl FixedSize for i64 {
+    const SIZE_IN_BYTES: usize = 8;
+}
+
 impl BinarySerializable for u8 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u8(*self)
@@ -86,6 +111,10 @@ impl BinarySerializable for u8 {
    }
 }

+impl FixedSize for u8 {
+    const SIZE_IN_BYTES: usize = 1;
+}
+
 impl BinarySerializable for String {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        let data: &[u8] = self.as_bytes();
@@ -104,63 +133,78 @@ impl BinarySerializable for String {
 }

 #[cfg(test)]
-mod test {
+pub mod test {

    use common::VInt;
    use super::*;

-    fn serialize_test<T: BinarySerializable + Eq>(v: T, num_bytes: usize) {
+    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
+        let mut buffer = Vec::new();
+        O::default().serialize(&mut buffer).unwrap();
+        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
+    }
+
+    fn serialize_test<T: BinarySerializable + Eq>(v: T) -> usize {
        let mut buffer: Vec<u8> = Vec::new();
-        if num_bytes != 0 {
-            v.serialize(&mut buffer).unwrap();
-            assert_eq!(buffer.len(), num_bytes);
-        } else {
-            v.serialize(&mut buffer).unwrap();
-        }
+        v.serialize(&mut buffer).unwrap();
+        let num_bytes = buffer.len();
        let mut cursor = &buffer[..];
        let deser = T::deserialize(&mut cursor).unwrap();
        assert_eq!(deser, v);
+        num_bytes
    }

    #[test]
    fn test_serialize_u8() {
-        serialize_test(3u8, 1);
-        serialize_test(5u8, 1);
+        fixed_size_test::<u8>();
    }

    #[test]
    fn test_serialize_u32() {
-        serialize_test(3u32, 4);
-        serialize_test(5u32, 4);
-        serialize_test(u32::max_value(), 4);
+        fixed_size_test::<u32>();
+        assert_eq!(4, serialize_test(3u32));
+        assert_eq!(4, serialize_test(5u32));
+        assert_eq!(4, serialize_test(u32::max_value()));
+    }
+
+    #[test]
+    fn test_serialize_i64() {
+        fixed_size_test::<i64>();
+    }
+
+    #[test]
+    fn test_serialize_u64() {
+        fixed_size_test::<u64>();
    }

    #[test]
    fn test_serialize_string() {
-        serialize_test(String::from(""), 1);
-        serialize_test(String::from("ぽよぽよ"), 1 + 3 * 4);
-        serialize_test(String::from("富士さん見える。"), 1 + 3 * 8);
+        assert_eq!(serialize_test(String::from("")), 1);
+        assert_eq!(serialize_test(String::from("ぽよぽよ")), 1 + 3 * 4);
+        assert_eq!(
+            serialize_test(String::from("富士さん見える。")),
+            1 + 3 * 8
+        );
    }

    #[test]
    fn test_serialize_vec() {
-        let v: Vec<u8> = Vec::new();
-        serialize_test(v, 1);
-        serialize_test(vec![1u32, 3u32], 1 + 4 * 2);
+        assert_eq!(serialize_test(Vec::<u8>::new()), 1);
+        assert_eq!(serialize_test(vec![1u32, 3u32]), 1 + 4 * 2);
    }

    #[test]
    fn test_serialize_vint() {
        for i in 0..10_000 {
-            serialize_test(VInt(i as u64), 0);
+            serialize_test(VInt(i as u64));
        }
-        serialize_test(VInt(7u64), 1);
-        serialize_test(VInt(127u64), 1);
-        serialize_test(VInt(128u64), 2);
-        serialize_test(VInt(129u64), 2);
-        serialize_test(VInt(1234u64), 2);
-        serialize_test(VInt(16_383), 2);
-        serialize_test(VInt(16_384), 3);
-        serialize_test(VInt(u64::max_value()), 10);
+        assert_eq!(serialize_test(VInt(7u64)), 1);
+        assert_eq!(serialize_test(VInt(127u64)), 1);
+        assert_eq!(serialize_test(VInt(128u64)), 2);
+        assert_eq!(serialize_test(VInt(129u64)), 2);
+        assert_eq!(serialize_test(VInt(1234u64)), 2);
+        assert_eq!(serialize_test(VInt(16_383u64)), 2);
+        assert_eq!(serialize_test(VInt(16_384u64)), 3);
+        assert_eq!(serialize_test(VInt(u64::max_value())), 10);
    }
 }
--- a/src/common/timer.rs
+++ b/src/common/timer.rs
@@ -1,99 +0,0 @@
-use time::PreciseTime;
-
-pub struct OpenTimer<'a> {
-    name: &'static str,
-    timer_tree: &'a mut TimerTree,
-    start: PreciseTime,
-    depth: u32,
-}
-
-impl<'a> OpenTimer<'a> {
-    /// Starts timing a new named subtask
-    ///
-    /// The timer is stopped automatically
-    /// when the `OpenTimer` is dropped.
-    pub fn open(&mut self, name: &'static str) -> OpenTimer {
-        OpenTimer {
-            name: name,
-            timer_tree: self.timer_tree,
-            start: PreciseTime::now(),
-            depth: self.depth + 1,
-        }
-    }
-}
-
-impl<'a> Drop for OpenTimer<'a> {
-    fn drop(&mut self) {
-        self.timer_tree.timings.push(Timing {
-            name: self.name,
-            duration: self.start
-                .to(PreciseTime::now())
-                .num_microseconds()
-                .unwrap(),
-            depth: self.depth,
-        });
-    }
-}
-
-/// Timing recording
-#[derive(Debug, Serialize)]
-pub struct Timing {
-    name: &'static str,
-    duration: i64,
-    depth: u32,
-}
-
-/// Timer tree
-#[derive(Debug, Serialize)]
-pub struct TimerTree {
-    timings: Vec<Timing>,
-}
-
-impl TimerTree {
-    /// Returns the total time elapsed in microseconds
-    pub fn total_time(&self) -> i64 {
-        self.timings.last().unwrap().duration
-    }
-
-    /// Open a new named subtask
-    pub fn open(&mut self, name: &'static str) -> OpenTimer {
-        OpenTimer {
-            name: name,
-            timer_tree: self,
-            start: PreciseTime::now(),
-            depth: 0,
-        }
-    }
-}
-
-impl Default for TimerTree {
-    fn default() -> TimerTree {
-        TimerTree {
-            timings: Vec::new(),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-
-    #[test]
-    fn test_timer() {
-        let mut timer_tree = TimerTree::default();
-        {
-            let mut a = timer_tree.open("a");
-            {
-                let mut ab = a.open("b");
-                {
-                    let _abc = ab.open("c");
-                }
-                {
-                    let _abd = ab.open("d");
-                }
-            }
-        }
-        assert_eq!(timer_tree.timings.len(), 4);
-    }
-}
--- a/src/common/vint.rs
+++ b/src/common/vint.rs
@@ -11,6 +11,10 @@ impl VInt {
    pub fn val(&self) -> u64 {
        self.0
    }
+
+    pub fn deserialize_u64<R: Read>(reader: &mut R) -> io::Result<u64> {
+        VInt::deserialize(reader).map(|vint| vint.0)
+    }
 }

 impl BinarySerializable for VInt {
--- a/src/compression/mod.rs
+++ b/src/compression/mod.rs
@@ -3,39 +3,97 @@

 mod stream;

+pub const COMPRESSION_BLOCK_SIZE: usize = 128;
+const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * 4 + 1;
+
 pub use self::stream::CompressedIntStream;

-#[cfg(not(feature = "simdcompression"))]
-mod pack {
-    mod compression_pack_nosimd;
-    pub use self::compression_pack_nosimd::{BlockDecoder, BlockEncoder};
-}

-#[cfg(feature = "simdcompression")]
-mod pack {
-    mod compression_pack_simd;
-    pub use self::compression_pack_simd::{BlockDecoder, BlockEncoder};
-}
+use bitpacking::{BitPacker, BitPacker4x};

-pub use self::pack::{BlockDecoder, BlockEncoder};
-
-#[cfg(any(not(feature = "simdcompression"), target_env = "msvc"))]
-mod vint {
-    mod compression_vint_nosimd;
-    pub(crate) use self::compression_vint_nosimd::*;
-}
-
-#[cfg(all(feature = "simdcompression", not(target_env = "msvc")))]
-mod vint {
-    mod compression_vint_simd;
-    pub(crate) use self::compression_vint_simd::*;
-}

 /// Returns the size in bytes of a compressed block, given `num_bits`.
 pub fn compressed_block_size(num_bits: u8) -> usize {
-    1 + (num_bits as usize) * 16
+    1 + (num_bits as usize) * COMPRESSION_BLOCK_SIZE / 8
 }

+pub struct BlockEncoder {
+    bitpacker: BitPacker4x,
+    pub output: [u8; COMPRESSED_BLOCK_MAX_SIZE],
+    pub output_len: usize,
+}
+
+impl BlockEncoder {
+    pub fn new() -> BlockEncoder {
+        BlockEncoder {
+            bitpacker: BitPacker4x::new(),
+            output: [0u8; COMPRESSED_BLOCK_MAX_SIZE],
+            output_len: 0,
+        }
+    }
+
+    pub fn compress_block_sorted(&mut self, block: &[u32], offset: u32) -> &[u8] {
+        let num_bits = self.bitpacker.num_bits_sorted(offset, block);
+        self.output[0] = num_bits;
+        let written_size = 1 + self.bitpacker.compress_sorted(offset, block, &mut self.output[1..], num_bits);
+        &self.output[..written_size]
+    }
+
+    pub fn compress_block_unsorted(&mut self, block: &[u32]) -> &[u8] {
+        let num_bits = self.bitpacker.num_bits(block);
+        self.output[0] = num_bits;
+        let written_size = 1 + self.bitpacker.compress(block, &mut self.output[1..], num_bits);
+        &self.output[..written_size]
+    }
+}
+
+
+pub struct BlockDecoder {
+    bitpacker: BitPacker4x,
+    pub output: [u32; COMPRESSION_BLOCK_SIZE + 1],
+    pub output_len: usize,
+}
+
+impl BlockDecoder {
+    pub fn new() -> BlockDecoder {
+        BlockDecoder::with_val(0u32)
+    }
+
+    pub fn with_val(val: u32) -> BlockDecoder {
+        let mut output = [val; COMPRESSION_BLOCK_SIZE + 1];
+        output[COMPRESSION_BLOCK_SIZE] = 0u32;
+        BlockDecoder {
+            bitpacker: BitPacker4x::new(),
+            output,
+            output_len: 0,
+        }
+    }
+    
+    pub fn uncompress_block_sorted(&mut self, compressed_data: &[u8], offset: u32) -> usize {
+        let num_bits = compressed_data[0];
+        self.output_len = COMPRESSION_BLOCK_SIZE;
+        1 + self.bitpacker.decompress_sorted(offset, &compressed_data[1..], &mut self.output, num_bits)
+    }
+
+    pub fn uncompress_block_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> usize {
+        let num_bits = compressed_data[0];
+        self.output_len = COMPRESSION_BLOCK_SIZE;
+        1 + self.bitpacker.decompress(&compressed_data[1..], &mut self.output, num_bits)
+    }
+
+    #[inline]
+    pub fn output_array(&self) -> &[u32] {
+        &self.output[..self.output_len]
+    }
+
+    #[inline]
+    pub fn output(&self, idx: usize) -> u32 {
+        self.output[idx]
+    }
+}
+
+mod vint;
+
 pub trait VIntEncoder {
    /// Compresses an array of `u32` integers,
    /// using [delta-encoding](https://en.wikipedia.org/wiki/Delta_encoding)
@@ -112,8 +170,6 @@ impl VIntDecoder for BlockDecoder {
    }
 }

-pub const COMPRESSION_BLOCK_SIZE: usize = 128;
-
 #[cfg(test)]
 pub mod tests {

--- a/src/compression/pack/compression_pack_nosimd.rs
+++ b/src/compression/pack/compression_pack_nosimd.rs
@@ -1,142 +0,0 @@
-use common::bitpacker::compute_num_bits;
-use common::bitpacker::{BitPacker, BitUnpacker};
-use common::CountingWriter;
-use std::cmp;
-use std::io::Write;
-use super::super::COMPRESSION_BLOCK_SIZE;
-
-const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * 4 + 1;
-
-pub fn compress_sorted(vals: &mut [u32], output: &mut [u8], offset: u32) -> usize {
-    let mut max_delta = 0;
-    {
-        let mut local_offset = offset;
-        for i in 0..COMPRESSION_BLOCK_SIZE {
-            let val = vals[i];
-            let delta = val - local_offset;
-            max_delta = cmp::max(max_delta, delta);
-            vals[i] = delta;
-            local_offset = val;
-        }
-    }
-    let mut counting_writer = CountingWriter::wrap(output);
-    let num_bits = compute_num_bits(max_delta as u64);
-    counting_writer.write_all(&[num_bits]).unwrap();
-
-    let mut bit_packer = BitPacker::new(num_bits as usize);
-    for val in vals {
-        bit_packer.write(*val as u64, &mut counting_writer).unwrap();
-    }
-    counting_writer.written_bytes()
-}
-
-pub struct BlockEncoder {
-    pub output: [u8; COMPRESSED_BLOCK_MAX_SIZE],
-    pub output_len: usize,
-    input_buffer: [u32; COMPRESSION_BLOCK_SIZE],
-}
-
-impl BlockEncoder {
-    pub fn new() -> BlockEncoder {
-        BlockEncoder {
-            output: [0u8; COMPRESSED_BLOCK_MAX_SIZE],
-            output_len: 0,
-            input_buffer: [0u32; COMPRESSION_BLOCK_SIZE],
-        }
-    }
-
-    pub fn compress_block_sorted(&mut self, vals: &[u32], offset: u32) -> &[u8] {
-        self.input_buffer.clone_from_slice(vals);
-        let compressed_size = compress_sorted(&mut self.input_buffer, &mut self.output, offset);
-        &self.output[..compressed_size]
-    }
-
-    pub fn compress_block_unsorted(&mut self, vals: &[u32]) -> &[u8] {
-        let compressed_size = {
-            let output: &mut [u8] = &mut self.output;
-            let max = vals.iter()
-                .cloned()
-                .max()
-                .expect("compress unsorted called with an empty array");
-            let num_bits = compute_num_bits(max as u64);
-            let mut counting_writer = CountingWriter::wrap(output);
-            counting_writer.write_all(&[num_bits]).unwrap();
-            let mut bit_packer = BitPacker::new(num_bits as usize);
-            for val in vals {
-                bit_packer.write(*val as u64, &mut counting_writer).unwrap();
-            }
-            for _ in vals.len()..COMPRESSION_BLOCK_SIZE {
-                bit_packer
-                    .write(vals[0] as u64, &mut counting_writer)
-                    .unwrap();
-            }
-            bit_packer.flush(&mut counting_writer).expect(
-                "Flushing the bitpacking \
-                 in an in RAM buffer should never fail",
-            );
-            // we avoid writing "closing", because we
-            // do not want 7 bytes of padding here.
-            counting_writer.written_bytes()
-        };
-        &self.output[..compressed_size]
-    }
-}
-
-pub struct BlockDecoder {
-    pub output: [u32; COMPRESSED_BLOCK_MAX_SIZE],
-    pub output_len: usize,
-}
-
-impl BlockDecoder {
-    pub fn new() -> BlockDecoder {
-        BlockDecoder::with_val(0u32)
-    }
-
-    pub fn with_val(val: u32) -> BlockDecoder {
-        BlockDecoder {
-            output: [val; COMPRESSED_BLOCK_MAX_SIZE],
-            output_len: 0,
-        }
-    }
-
-    pub fn uncompress_block_sorted<'a>(
-        &mut self,
-        compressed_data: &'a [u8],
-        mut offset: u32,
-    ) -> usize {
-        let consumed_size = {
-            let num_bits = compressed_data[0];
-            let bit_unpacker = BitUnpacker::new(&compressed_data[1..], num_bits as usize);
-            for i in 0..COMPRESSION_BLOCK_SIZE {
-                let delta = bit_unpacker.get(i);
-                let val = offset + delta as u32;
-                self.output[i] = val;
-                offset = val;
-            }
-            1 + (num_bits as usize * COMPRESSION_BLOCK_SIZE + 7) / 8
-        };
-        self.output_len = COMPRESSION_BLOCK_SIZE;
-        consumed_size
-    }
-
-    pub fn uncompress_block_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> usize {
-        let num_bits = compressed_data[0];
-        let bit_unpacker = BitUnpacker::new(&compressed_data[1..], num_bits as usize);
-        for i in 0..COMPRESSION_BLOCK_SIZE {
-            self.output[i] = bit_unpacker.get(i) as u32;
-        }
-        let consumed_size = 1 + (num_bits as usize * COMPRESSION_BLOCK_SIZE + 7) / 8;
-        self.output_len = COMPRESSION_BLOCK_SIZE;
-        consumed_size
-    }
-
-    #[inline]
-    pub fn output_array(&self) -> &[u32] {
-        &self.output[..self.output_len]
-    }
-
-    #[inline]
-    pub fn output(&self, idx: usize) -> u32 {
-        self.output[idx]
-    }
-}
--- a/src/compression/pack/compression_pack_simd.rs
+++ b/src/compression/pack/compression_pack_simd.rs
@@ -1,116 +0,0 @@
-use super::super::COMPRESSION_BLOCK_SIZE;
-
-const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * 4 + 1;
-
-mod simdcomp {
-    use libc::size_t;
-
-    extern "C" {
-        pub fn compress_sorted(data: *const u32, output: *mut u8, offset: u32) -> size_t;
-
-        pub fn uncompress_sorted(
-            compressed_data: *const u8,
-            output: *mut u32,
-            offset: u32,
-        ) -> size_t;
-
-        pub fn compress_unsorted(data: *const u32, output: *mut u8) -> size_t;
-
-        pub fn uncompress_unsorted(compressed_data: *const u8, output: *mut u32) -> size_t;
-    }
-}
-
-fn compress_sorted(vals: &[u32], output: &mut [u8], offset: u32) -> usize {
-    unsafe { simdcomp::compress_sorted(vals.as_ptr(), output.as_mut_ptr(), offset) }
-}
-
-fn uncompress_sorted(compressed_data: &[u8], output: &mut [u32], offset: u32) -> usize {
-    unsafe { simdcomp::uncompress_sorted(compressed_data.as_ptr(), output.as_mut_ptr(), offset) }
-}
-
-fn compress_unsorted(vals: &[u32], output: &mut [u8]) -> usize {
-    unsafe { simdcomp::compress_unsorted(vals.as_ptr(), output.as_mut_ptr()) }
-}
-
-fn uncompress_unsorted(compressed_data: &[u8], output: &mut [u32]) -> usize {
-    unsafe { simdcomp::uncompress_unsorted(compressed_data.as_ptr(), output.as_mut_ptr()) }
-}
-
-pub struct BlockEncoder {
-    pub output: [u8; COMPRESSED_BLOCK_MAX_SIZE],
-    pub output_len: usize,
-}
-
-impl BlockEncoder {
-    pub fn new() -> BlockEncoder {
-        BlockEncoder {
-            output: [0u8; COMPRESSED_BLOCK_MAX_SIZE],
-            output_len: 0,
-        }
-    }
-
-    pub fn compress_block_sorted(&mut self, vals: &[u32], offset: u32) -> &[u8] {
-        let compressed_size = compress_sorted(vals, &mut self.output, offset);
-        &self.output[..compressed_size]
-    }
-
-    pub fn compress_block_unsorted(&mut self, vals: &[u32]) -> &[u8] {
-        let compressed_size = compress_unsorted(vals, &mut self.output);
-        &self.output[..compressed_size]
-    }
-}
-
-pub struct BlockDecoder {
-    pub output: [u32; COMPRESSED_BLOCK_MAX_SIZE],
-    pub output_len: usize,
-}
-
-impl BlockDecoder {
-    pub fn new() -> BlockDecoder {
-        BlockDecoder::with_val(0u32)
-    }
-
-    pub fn with_val(val: u32) -> BlockDecoder {
-        BlockDecoder {
-            output: [val; COMPRESSED_BLOCK_MAX_SIZE],
-            output_len: 0,
-        }
-    }
-
-    pub fn uncompress_block_sorted(&mut self, compressed_data: &[u8], offset: u32) -> usize {
-        let consumed_size = uncompress_sorted(compressed_data, &mut self.output, offset);
-        self.output_len = COMPRESSION_BLOCK_SIZE;
-        consumed_size
-    }
-
-    pub fn uncompress_block_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> usize {
-        let consumed_size = uncompress_unsorted(compressed_data, &mut self.output);
-        self.output_len = COMPRESSION_BLOCK_SIZE;
-        consumed_size
-    }
-
-    #[inline]
-    pub fn output_array(&self) -> &[u32] {
-        &self.output[..self.output_len]
-    }
-
-    #[inline]
-    pub fn output(&self, idx: usize) -> u32 {
-        self.output[idx]
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::BlockEncoder;
-
-    #[test]
-    fn test_all_docs_compression_len() {
-        let data: Vec<u32> = (0u32..128u32).collect();
-        let mut encoder = BlockEncoder::new();
-        let compressed = encoder.compress_block_sorted(&data, 0u32);
-        assert_eq!(compressed.len(), 17);
-    }
-
-}
--- a/src/compression/stream.rs
+++ b/src/compression/stream.rs
@@ -11,7 +11,12 @@ use directory::{ReadOnlySource, SourceRead};
 /// decompressing blocks that are not required.
 pub struct CompressedIntStream {
    buffer: SourceRead,
+
    block_decoder: BlockDecoder,
+    cached_addr: usize, // address of the currently decoded block
+    cached_next_addr: usize, // address following the currently decoded block
+
+    addr: usize, // address of the block associated to the current position
    inner_offset: usize,
 }

@@ -21,34 +26,47 @@ impl CompressedIntStream {
        CompressedIntStream {
            buffer: SourceRead::from(source),
            block_decoder: BlockDecoder::new(),
-            inner_offset: COMPRESSION_BLOCK_SIZE,
+            cached_addr: usize::max_value(),
+            cached_next_addr: usize::max_value(),
+
+            addr: 0,
+            inner_offset: 0,
        }
    }

-    /// Fills a buffer with the next `output.len()` integers,
-    /// and advance the stream by that many els.
+    /// Loads the block at the given address and return the address of the
+    /// following block
+    pub fn read_block(&mut self, addr: usize) -> usize {
+        if self.cached_addr == addr {
+            // we are already on this block.
+            // no need to read.
+            self.cached_next_addr
+        } else {
+            let next_addr = addr + self.block_decoder.uncompress_block_unsorted(self.buffer.slice_from(addr));
+            self.cached_addr = addr;
+            self.cached_next_addr = next_addr;
+            next_addr
+        }
+    }
+
+    /// Fills a buffer with the next `output.len()` integers.
+    /// This does not consume / advance the stream.
    pub fn read(&mut self, output: &mut [u32]) {
+        let mut cursor = self.addr;
+        let mut inner_offset = self.inner_offset;
        let mut num_els: usize = output.len();
-        let mut start: usize = 0;
+        let mut start = 0;
        loop {
-            let available = COMPRESSION_BLOCK_SIZE - self.inner_offset;
-            if num_els >= available {
-                if available > 0 {
-                    let uncompressed_block =
-                        &self.block_decoder.output_array()[self.inner_offset..];
-                    output[start..][..available].clone_from_slice(uncompressed_block);
-                }
-                num_els -= available;
-                start += available;
-                let num_consumed_bytes = self.block_decoder
-                    .uncompress_block_unsorted(self.buffer.as_ref());
-                self.buffer.advance(num_consumed_bytes);
-                self.inner_offset = 0;
+            cursor = self.read_block(cursor);
+            let block = &self.block_decoder.output_array()[inner_offset..];
+            let block_len = block.len();
+            if num_els >= block_len {
+                output[start..start + block_len].clone_from_slice(&block);
+                start += block_len;
+                num_els -= block_len;
+                inner_offset = 0;
            } else {
-                let uncompressed_block = &self.block_decoder.output_array()
-                    [self.inner_offset..self.inner_offset + num_els];
-                output[start..][..num_els].clone_from_slice(uncompressed_block);
-                self.inner_offset += num_els;
+                output[start..].clone_from_slice(&block[..num_els]);
                break;
            }
        }
@@ -58,23 +76,22 @@ impl CompressedIntStream {
    ///
    /// If a full block is skipped, calling
    /// `.skip(...)` will avoid decompressing it.
+    ///
+    /// May panic if the end of the stream is reached.
    pub fn skip(&mut self, mut skip_len: usize) {
-        let available = COMPRESSION_BLOCK_SIZE - self.inner_offset;
-        if available >= skip_len {
-            self.inner_offset += skip_len;
-        } else {
-            skip_len -= available;
-            // entirely skip decompressing some blocks.
-            while skip_len >= COMPRESSION_BLOCK_SIZE {
-                skip_len -= COMPRESSION_BLOCK_SIZE;
-                let num_bits: u8 = self.buffer.as_ref()[0];
+        loop {
+            let available = COMPRESSION_BLOCK_SIZE - self.inner_offset;
+            if available >= skip_len {
+                self.inner_offset += skip_len;
+                break;
+            } else {
+                skip_len -= available;
+                // entirely skip decompressing some blocks.
+                let num_bits: u8 = self.buffer.get(self.addr);
                let block_len = compressed_block_size(num_bits);
-                self.buffer.advance(block_len);
+                self.addr += block_len;
+                self.inner_offset = 0;
            }
-            let num_consumed_bytes = self.block_decoder
-                .uncompress_block_unsorted(self.buffer.as_ref());
-            self.buffer.advance(num_consumed_bytes);
-            self.inner_offset = skip_len;
        }
    }
 }
@@ -91,7 +108,7 @@ pub mod tests {
    fn create_stream_buffer() -> ReadOnlySource {
        let mut buffer: Vec<u8> = vec![];
        let mut encoder = BlockEncoder::new();
-        let vals: Vec<u32> = (0u32..1_025u32).collect();
+        let vals: Vec<u32> = (0u32..1152u32).collect();
        for chunk in vals.chunks(COMPRESSION_BLOCK_SIZE) {
            let compressed_block = encoder.compress_block_unsorted(chunk);
            let num_bits = compressed_block[0];
@@ -113,13 +130,24 @@ pub mod tests {
        stream.read(&mut block[0..2]);
        assert_eq!(block[0], 0);
        assert_eq!(block[1], 1);
+
+        // reading does not consume the stream
+        stream.read(&mut block[0..2]);
+        assert_eq!(block[0], 0);
+        assert_eq!(block[1], 1);
+        stream.skip(2);
+
        stream.skip(5);
        stream.read(&mut block[0..3]);
+        stream.skip(3);
+
        assert_eq!(block[0], 7);
        assert_eq!(block[1], 8);
        assert_eq!(block[2], 9);
        stream.skip(500);
        stream.read(&mut block[0..3]);
+        stream.skip(3);
+
        assert_eq!(block[0], 510);
        assert_eq!(block[1], 511);
        assert_eq!(block[2], 512);
--- a/src/compression/vint/compression_vint_nosimd.rs
+++ b/src/compression/vint/compression_vint_nosimd.rs
--- a/src/compression/vint/compression_vint_simd.rs
+++ b/src/compression/vint/compression_vint_simd.rs
@@ -1,72 +0,0 @@
-mod streamvbyte {
-
-    use libc::size_t;
-
-    extern "C" {
-        pub fn streamvbyte_delta_encode(
-            data: *const u32,
-            num_els: u32,
-            output: *mut u8,
-            offset: u32,
-        ) -> size_t;
-
-        pub fn streamvbyte_delta_decode(
-            compressed_data: *const u8,
-            output: *mut u32,
-            num_els: u32,
-            offset: u32,
-        ) -> size_t;
-
-        pub fn streamvbyte_encode(data: *const u32, num_els: u32, output: *mut u8) -> size_t;
-
-        pub fn streamvbyte_decode(
-            compressed_data: *const u8,
-            output: *mut u32,
-            num_els: usize,
-        ) -> size_t;
-    }
-}
-
-#[inline(always)]
-pub(crate) fn compress_sorted<'a>(input: &[u32], output: &'a mut [u8], offset: u32) -> &'a [u8] {
-    let compress_length = unsafe {
-        streamvbyte::streamvbyte_delta_encode(
-            input.as_ptr(),
-            input.len() as u32,
-            output.as_mut_ptr(),
-            offset,
-        )
-    };
-    &output[..compress_length]
-}
-
-#[inline(always)]
-pub(crate) fn compress_unsorted<'a>(input: &[u32], output: &'a mut [u8]) -> &'a [u8] {
-    let compress_length = unsafe {
-        streamvbyte::streamvbyte_encode(input.as_ptr(), input.len() as u32, output.as_mut_ptr())
-    };
-    &output[..compress_length]
-}
-
-#[inline(always)]
-pub(crate) fn uncompress_sorted<'a>(
-    compressed_data: &'a [u8],
-    output: &mut [u32],
-    offset: u32,
-) -> usize {
-    unsafe {
-        streamvbyte::streamvbyte_delta_decode(
-            compressed_data.as_ptr(),
-            output.as_mut_ptr(),
-            output.len() as u32,
-            offset,
-        )
-    }
-}
-
-#[inline(always)]
-pub(crate) fn uncompress_unsorted<'a>(compressed_data: &'a [u8], output: &mut [u32]) -> usize {
-    unsafe {
-        streamvbyte::streamvbyte_decode(compressed_data.as_ptr(), output.as_mut_ptr(), output.len())
-    }
-}
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -6,7 +6,11 @@ use std::sync::Arc;
 use std::borrow::BorrowMut;
 use std::fmt;
 use core::SegmentId;
-use directory::{Directory, MmapDirectory, RAMDirectory};
+
+
+#[cfg(feature="mmap")]
+use directory::MmapDirectory;
+use directory::{Directory, RAMDirectory};
 use indexer::index_writer::open_index_writer;
 use core::searcher::Searcher;
 use std::convert::From;
@@ -61,6 +65,7 @@ impl Index {
    /// The index will use the `MMapDirectory`.
    ///
    /// If a previous index was in this directory, then its meta file will be destroyed.
+    #[cfg(feature="mmap")]
    pub fn create<P: AsRef<Path>>(directory_path: P, schema: Schema) -> Result<Index> {
        let mmap_directory = MmapDirectory::open(directory_path)?;
        let directory = ManagedDirectory::new(mmap_directory)?;
@@ -80,6 +85,8 @@ impl Index {
    ///
    /// The temp directory is only used for testing the `MmapDirectory`.
    /// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
+    #[cfg(feature="mmap")]
+    #[cfg(test)]
    pub fn create_from_tempdir(schema: Schema) -> Result<Index> {
        let mmap_directory = MmapDirectory::create_from_tempdir()?;
        let directory = ManagedDirectory::new(mmap_directory)?;
@@ -107,6 +114,7 @@ impl Index {
    }

    /// Opens a new directory from an index path.
+    #[cfg(feature="mmap")]
    pub fn open<P: AsRef<Path>>(directory_path: P) -> Result<Index> {
        let mmap_directory = MmapDirectory::open(directory_path)?;
        let directory = ManagedDirectory::new(mmap_directory)?;
@@ -114,6 +122,13 @@ impl Index {
        Index::create_from_metas(directory, &metas)
    }

+    pub fn open_directory<TDirectory: Directory>(directory: TDirectory) -> Result<Index> {
+        let directory = ManagedDirectory::new(directory)?;
+        let metas = load_metas(&directory)?;
+        Index::create_from_metas(directory, &metas)
+    }
+
+
    /// Reads the index meta file from the directory.
    pub fn load_metas(&self) -> Result<IndexMeta> {
        load_metas(self.directory())
--- a/src/core/index_meta.rs
+++ b/src/core/index_meta.rs
@@ -1,5 +1,7 @@
 use schema::Schema;
 use core::SegmentMeta;
+use std::fmt;
+use serde_json;

 /// Meta information about the `Index`.
 ///
@@ -9,12 +11,13 @@ use core::SegmentMeta;
 /// * the index `docstamp`
 /// * the schema
 ///
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Serialize, Deserialize)]
 pub struct IndexMeta {
    pub segments: Vec<SegmentMeta>,
    pub schema: Schema,
    pub opstamp: u64,
-    #[serde(skip_serializing_if = "Option::is_none")] pub payload: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub payload: Option<String>,
 }

 impl IndexMeta {
@@ -28,6 +31,17 @@ impl IndexMeta {
    }
 }

+impl fmt::Debug for IndexMeta {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{}",
+            serde_json::ser::to_string(self)
+                .expect("JSON serialization for IndexMeta should never fail.")
+        )
+    }
+}
+
 #[cfg(test)]
 mod tests {

--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -4,10 +4,10 @@ use postings::{BlockSegmentPostings, SegmentPostings};
 use postings::TermInfo;
 use schema::IndexRecordOption;
 use schema::Term;
-use std::cmp;
-use fastfield::DeleteBitSet;
-use schema::Schema;
 use compression::CompressedIntStream;
+use postings::FreqReadingOption;
+use common::BinarySerializable;
+use schema::FieldType;

 /// The inverted index reader is in charge of accessing
 /// the inverted index associated to a specific field.
@@ -26,30 +26,47 @@ pub struct InvertedIndexReader {
    termdict: TermDictionaryImpl,
    postings_source: ReadOnlySource,
    positions_source: ReadOnlySource,
-    delete_bitset: DeleteBitSet,
-    schema: Schema,
+    record_option: IndexRecordOption,
+    total_num_tokens: u64
 }

 impl InvertedIndexReader {
    pub(crate) fn new(
-        termdict_source: ReadOnlySource,
+        termdict: TermDictionaryImpl,
        postings_source: ReadOnlySource,
        positions_source: ReadOnlySource,
-        delete_bitset: DeleteBitSet,
-        schema: Schema,
+        record_option: IndexRecordOption,
    ) -> InvertedIndexReader {
+        let total_num_tokens_data = postings_source.slice(0, 8);
+        let mut total_num_tokens_cursor = total_num_tokens_data.as_slice();
+        let total_num_tokens = u64::deserialize(&mut total_num_tokens_cursor).unwrap_or(0u64);
        InvertedIndexReader {
-            termdict: TermDictionaryImpl::from_source(termdict_source),
-            postings_source,
+            termdict,
+            postings_source: postings_source.slice_from(8),
            positions_source,
-            delete_bitset,
-            schema,
+            record_option,
+            total_num_tokens
+        }
+    }
+
+    /// Creates an empty `InvertedIndexReader` object, which
+    /// contains no terms at all.
+    pub fn empty(field_type: FieldType) -> InvertedIndexReader {
+        let record_option = field_type
+            .get_index_record_option()
+            .unwrap_or(IndexRecordOption::Basic);
+        InvertedIndexReader {
+            termdict:    TermDictionaryImpl::empty(field_type),
+            postings_source: ReadOnlySource::empty(),
+            positions_source: ReadOnlySource::empty(),
+            record_option,
+            total_num_tokens: 0u64
        }
    }

    /// Returns the term info associated with the term.
    pub fn get_term_info(&self, term: &Term) -> Option<TermInfo> {
-        self.termdict.get(term.as_slice())
+        self.termdict.get(term.value_bytes())
    }

    /// Return the term dictionary datastructure.
@@ -86,15 +103,19 @@ impl InvertedIndexReader {
    pub fn read_block_postings_from_terminfo(
        &self,
        term_info: &TermInfo,
-        option: IndexRecordOption,
+        requested_option: IndexRecordOption,
    ) -> BlockSegmentPostings {
        let offset = term_info.postings_offset as usize;
        let postings_data = self.postings_source.slice_from(offset);
-        let has_freq = option.has_freq();
+        let freq_reading_option = match (self.record_option, requested_option) {
+            (IndexRecordOption::Basic, _) => FreqReadingOption::NoFreq,
+            (_, IndexRecordOption::Basic) => FreqReadingOption::SkipFreq,
+            (_, _) => FreqReadingOption::ReadFreq,
+        };
        BlockSegmentPostings::from_data(
            term_info.doc_freq as usize,
            SourceRead::from(postings_data),
-            has_freq,
+            freq_reading_option,
        )
    }

@@ -108,7 +129,6 @@ impl InvertedIndexReader {
        option: IndexRecordOption,
    ) -> SegmentPostings {
        let block_postings = self.read_block_postings_from_terminfo(term_info, option);
-        let delete_bitset = self.delete_bitset.clone();
        let position_stream = {
            if option.has_positions() {
                let position_offset = term_info.positions_offset;
@@ -120,9 +140,17 @@ impl InvertedIndexReader {
                None
            }
        };
-        SegmentPostings::from_block_postings(block_postings, delete_bitset, position_stream)
+        SegmentPostings::from_block_postings(block_postings, position_stream)
    }

+    /// Returns the total number of tokens recorded for all documents
+    /// (including deleted documents).
+    pub fn total_num_tokens(&self) -> u64 {
+        self.total_num_tokens
+    }
+
+
+
    /// Returns the segment postings associated with the term, and with the given option,
    /// or `None` if the term has never been encountered and indexed.
    ///
@@ -134,19 +162,23 @@ impl InvertedIndexReader {
    /// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
    /// with `DocId`s and frequencies.
    pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
-        let field = term.field();
-        let field_entry = self.schema.get_field_entry(field);
        let term_info = get!(self.get_term_info(term));
-        let maximum_option = get!(field_entry.field_type().get_index_record_option());
-        let best_effort_option = cmp::min(maximum_option, option);
-        Some(self.read_postings_from_terminfo(&term_info, best_effort_option))
+        Some(self.read_postings_from_terminfo(&term_info, option))
    }

+    pub(crate) fn read_postings_no_deletes(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
+        let term_info = get!(self.get_term_info(term));
+        Some(self.read_postings_from_terminfo(&term_info, option))
+    }
+
+
    /// Returns the number of documents containing the term.
    pub fn doc_freq(&self, term: &Term) -> u32 {
-        match self.get_term_info(term) {
-            Some(term_info) => term_info.doc_freq,
-            None => 0,
-        }
+        self.get_term_info(term)
+            .map(|term_info| term_info.doc_freq)
+            .unwrap_or(0u32)
    }
 }
+
+
+
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -2,9 +2,7 @@ use Result;
 use core::SegmentReader;
 use schema::Document;
 use collector::Collector;
-use common::TimerTree;
 use query::Query;
-use DocId;
 use DocAddress;
 use schema::{Field, Term};
 use termdict::{TermDictionary, TermMerger};
@@ -33,20 +31,20 @@ impl Searcher {
    }

    /// Returns the overall number of documents in the index.
-    pub fn num_docs(&self) -> DocId {
+    pub fn num_docs(&self) -> u64 {
        self.segment_readers
            .iter()
-            .map(|segment_reader| segment_reader.num_docs())
-            .fold(0u32, |acc, val| acc + val)
+            .map(|segment_reader| segment_reader.num_docs() as u64)
+            .sum::<u64>()
    }

    /// Return the overall number of documents containing
    /// the given term.
-    pub fn doc_freq(&self, term: &Term) -> u32 {
+    pub fn doc_freq(&self, term: &Term) -> u64 {
        self.segment_readers
            .iter()
-            .map(|segment_reader| segment_reader.inverted_index(term.field()).doc_freq(term))
-            .fold(0u32, |acc, val| acc + val)
+            .map(|segment_reader| segment_reader.inverted_index(term.field()).doc_freq(term) as u64)
+            .sum::<u64>()
    }

    /// Return the list of segment readers
@@ -60,7 +58,7 @@ impl Searcher {
    }

    /// Runs a query on the segment readers wrapped by the searcher
-    pub fn search<C: Collector>(&self, query: &Query, collector: &mut C) -> Result<TimerTree> {
+    pub fn search<C: Collector>(&self, query: &Query, collector: &mut C) -> Result<()> {
        query.search(self, collector)
    }

--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -8,7 +8,6 @@ use core::SegmentMeta;
 use fastfield::{self, FastFieldNotAvailableError};
 use fastfield::DeleteBitSet;
 use store::StoreReader;
-use directory::ReadOnlySource;
 use schema::Document;
 use DocId;
 use std::sync::Arc;
@@ -17,8 +16,16 @@ use common::CompositeFile;
 use std::fmt;
 use core::InvertedIndexReader;
 use schema::Field;
-use fastfield::{FastFieldReader, U64FastFieldReader};
+use schema::FieldType;
+use error::ErrorKind;
+use termdict::TermDictionaryImpl;
+use fastfield::FacetReader;
+use fastfield::FastFieldReader;
 use schema::Schema;
+use termdict::TermDictionary;
+use fastfield::{FastValue, MultiValueIntFastFieldReader};
+use schema::Cardinality;
+use fieldnorm::FieldNormReader;

 /// Entry point to access all of the datastructures of the `Segment`
 ///
@@ -31,6 +38,8 @@ use schema::Schema;
 /// The segment reader has a very low memory footprint,
 /// as close to all of the memory data is mmapped.
 ///
+///
+/// TODO fix not decoding docfreq
 #[derive(Clone)]
 pub struct SegmentReader {
    inv_idx_reader_cache: Arc<RwLock<HashMap<Field, Arc<InvertedIndexReader>>>>,
@@ -45,7 +54,7 @@ pub struct SegmentReader {
    fieldnorms_composite: CompositeFile,

    store_reader: StoreReader,
-    delete_bitset: DeleteBitSet,
+    delete_bitset_opt: Option<DeleteBitSet>,
    schema: Schema,
 }

@@ -70,7 +79,14 @@ impl SegmentReader {
    /// Return the number of documents that have been
    /// deleted in the segment.
    pub fn num_deleted_docs(&self) -> DocId {
-        self.delete_bitset.len() as DocId
+        self.delete_bitset()
+            .map(|delete_set| delete_set.len() as DocId)
+            .unwrap_or(0u32)
+    }
+
+    /// Returns true iff some of the documents of the segment have been deleted.
+    pub fn has_deletes(&self) -> bool {
+        self.delete_bitset().is_some()
    }

    /// Accessor to a segment's fast field reader given a field.
@@ -83,21 +99,69 @@ impl SegmentReader {
    ///
    /// # Panics
    /// May panic if the index is corrupted.
-    pub fn get_fast_field_reader<TFastFieldReader: FastFieldReader>(
+    pub fn fast_field_reader<Item: FastValue>(
        &self,
        field: Field,
-    ) -> fastfield::Result<TFastFieldReader> {
+    ) -> fastfield::Result<FastFieldReader<Item>> {
        let field_entry = self.schema.get_field_entry(field);
-        if !TFastFieldReader::is_enabled(field_entry.field_type()) {
+        if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue)
+            {
+                self.fast_fields_composite
+                    .open_read(field)
+                    .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
+                    .map(FastFieldReader::open)
+            } else {
            Err(FastFieldNotAvailableError::new(field_entry))
-        } else {
-            self.fast_fields_composite
-                .open_read(field)
-                .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
-                .map(TFastFieldReader::open)
        }
    }

+    /// Accessor to the `MultiValueIntFastFieldReader` associated to a given `Field`.
+    /// May panick if the field is not a multivalued fastfield of the type `Item`.
+    pub fn multi_fast_field_reader<Item: FastValue>(
+        &self,
+        field: Field,
+    ) -> fastfield::Result<MultiValueIntFastFieldReader<Item>> {
+        let field_entry = self.schema.get_field_entry(field);
+        if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues)
+            {
+                let idx_reader = self.fast_fields_composite
+                    .open_read_with_idx(field, 0)
+                    .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
+                    .map(FastFieldReader::open)?;
+                let vals_reader = self.fast_fields_composite
+                    .open_read_with_idx(field, 1)
+                    .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
+                    .map(FastFieldReader::open)?;
+                Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
+            } else {
+            Err(FastFieldNotAvailableError::new(field_entry))
+        }
+    }
+
+    /// Accessor to the `FacetReader` associated to a given `Field`.
+    pub fn facet_reader(&self, field: Field) -> Result<FacetReader> {
+        let field_entry = self.schema.get_field_entry(field);
+        if field_entry.field_type() != &FieldType::HierarchicalFacet {
+            return Err(ErrorKind::InvalidArgument(format!(
+                "The field {:?} is not a \
+                 hierarchical facet.",
+                field_entry
+            )).into());
+        }
+        let term_ords_reader = self.multi_fast_field_reader(field)?;
+        let termdict_source = self.termdict_composite.open_read(field).ok_or_else(|| {
+            ErrorKind::InvalidArgument(format!(
+                "The field \"{}\" is a hierarchical \
+                 but this segment does not seem to have the field term \
+                 dictionary.",
+                field_entry.name()
+            ))
+        })?;
+        let termdict = TermDictionaryImpl::from_source(termdict_source);
+        let facet_reader = FacetReader::new(term_ords_reader, termdict);
+        Ok(facet_reader)
+    }
+
    /// Accessor to the segment's `Field norms`'s reader.
    ///
    /// Field norms are the length (in tokens) of the fields.
@@ -106,10 +170,15 @@ impl SegmentReader {
    ///
    /// They are simply stored as a fast field, serialized in
    /// the `.fieldnorm` file of the segment.
-    pub fn get_fieldnorms_reader(&self, field: Field) -> Option<U64FastFieldReader> {
-        self.fieldnorms_composite
-            .open_read(field)
-            .map(U64FastFieldReader::open)
+    pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader {
+        if let Some(fieldnorm_source) = self.fieldnorms_composite
+            .open_read(field) {
+            FieldNormReader::open(fieldnorm_source)
+        } else {
+            let field_name = self.schema.get_field_name(field);
+            let err_msg=  format!("Field norm not found for field {:?}. Was it market as indexed during indexing.", field_name);
+            panic!(err_msg);
+        }
    }

    /// Accessor to the segment's `StoreReader`.
@@ -142,12 +211,13 @@ impl SegmentReader {
        let fieldnorms_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
        let fieldnorms_composite = CompositeFile::open(&fieldnorms_data)?;

-        let delete_bitset = if segment.meta().has_deletes() {
-            let delete_data = segment.open_read(SegmentComponent::DELETE)?;
-            DeleteBitSet::open(delete_data)
-        } else {
-            DeleteBitSet::empty()
-        };
+        let delete_bitset_opt =
+            if segment.meta().has_deletes() {
+                let delete_data = segment.open_read(SegmentComponent::DELETE)?;
+                Some(DeleteBitSet::open(delete_data))
+            } else {
+                None
+            };

        let schema = segment.schema();
        Ok(SegmentReader {
@@ -159,13 +229,15 @@ impl SegmentReader {
            fieldnorms_composite,
            segment_id: segment.id(),
            store_reader,
-            delete_bitset,
+            delete_bitset_opt,
            positions_composite,
            schema,
        })
    }

    /// Returns a field reader associated to the field given in argument.
+    /// If the field was not present in the index during indexing time,
+    /// the InvertedIndexReader is empty.
    ///
    /// The field reader is in charge of iterating through the
    /// term dictionary associated to a specific field,
@@ -176,27 +248,43 @@ impl SegmentReader {
            .expect("Lock poisoned. This should never happen")
            .get(&field)
        {
-            Arc::clone(inv_idx_reader);
+            return Arc::clone(inv_idx_reader);
+        }
+        let field_entry = self.schema.get_field_entry(field);
+        let field_type = field_entry.field_type();
+        let record_option_opt = field_type.get_index_record_option();
+
+        if record_option_opt.is_none() {
+            panic!("Field {:?} does not seem indexed.", field_entry.name());
        }

-        let termdict_source: ReadOnlySource = self.termdict_composite
-            .open_read(field)
-            .expect("Index corrupted. Failed to open field term dictionary in composite file.");
+        let record_option = record_option_opt.unwrap();

-        let postings_source = self.postings_composite
+        let postings_source_opt = self.postings_composite.open_read(field);
+
+        if postings_source_opt.is_none() {
+            // no documents in the segment contained this field.
+            // As a result, no data is associated to the inverted index.
+            //
+            // Returns an empty inverted index.
+            return Arc::new(InvertedIndexReader::empty(field_type.clone()));
+        }
+
+        let postings_source = postings_source_opt.unwrap();
+
+        let termdict_source = self.termdict_composite
            .open_read(field)
-            .expect("Index corrupted. Failed to open field postings in composite file.");
+            .expect("Failed to open field term dictionary in composite file. Is the field indexed");

        let positions_source = self.positions_composite
            .open_read(field)
            .expect("Index corrupted. Failed to open field positions in composite file.");

        let inv_idx_reader = Arc::new(InvertedIndexReader::new(
-            termdict_source,
+            TermDictionaryImpl::from_source(termdict_source),
            postings_source,
            positions_source,
-            self.delete_bitset.clone(),
-            self.schema.clone(),
+            record_option,
        ));

        // by releasing the lock in between, we may end up opening the inverting index
@@ -224,14 +312,16 @@ impl SegmentReader {

    /// Returns the bitset representing
    /// the documents that have been deleted.
-    pub fn delete_bitset(&self) -> &DeleteBitSet {
-        &self.delete_bitset
+    pub fn delete_bitset(&self) -> Option<&DeleteBitSet> {
+        self.delete_bitset_opt.as_ref()
    }

    /// Returns true iff the `doc` is marked
    /// as deleted.
    pub fn is_deleted(&self, doc: DocId) -> bool {
-        self.delete_bitset.is_deleted(doc)
+        self.delete_bitset()
+            .map(|delete_set| delete_set.is_deleted(doc))
+            .unwrap_or(false)
    }
 }

--- a/src/datastruct/skip/mod.rs
+++ b/src/datastruct/skip/mod.rs
@@ -9,12 +9,12 @@ pub use self::skiplist::SkipList;
 #[cfg(test)]
 mod tests {

-    use super::*;
+    use super::{SkipList, SkipListBuilder};

    #[test]
    fn test_skiplist() {
        let mut output: Vec<u8> = Vec::new();
-        let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(10);
+        let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(8);
        skip_list_builder.insert(2, &3).unwrap();
        skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
        let mut skip_list: SkipList<u32> = SkipList::from(output.as_slice());
@@ -24,7 +24,7 @@ mod tests {
    #[test]
    fn test_skiplist2() {
        let mut output: Vec<u8> = Vec::new();
-        let skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(10);
+        let skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(8);
        skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
        let mut skip_list: SkipList<u32> = SkipList::from(output.as_slice());
        assert_eq!(skip_list.next(), None);
@@ -71,7 +71,7 @@ mod tests {
    #[test]
    fn test_skiplist5() {
        let mut output: Vec<u8> = Vec::new();
-        let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3);
+        let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4);
        skip_list_builder.insert(2, &()).unwrap();
        skip_list_builder.insert(3, &()).unwrap();
        skip_list_builder.insert(5, &()).unwrap();
@@ -103,7 +103,7 @@ mod tests {
    #[test]
    fn test_skiplist7() {
        let mut output: Vec<u8> = Vec::new();
-        let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3);
+        let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4);
        for i in 0..1000 {
            skip_list_builder.insert(i, &()).unwrap();
        }
@@ -121,35 +121,48 @@ mod tests {
    #[test]
    fn test_skiplist8() {
        let mut output: Vec<u8> = Vec::new();
-        let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(10);
+        let mut skip_list_builder: SkipListBuilder<u64> = SkipListBuilder::new(8);
        skip_list_builder.insert(2, &3).unwrap();
        skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
-        assert_eq!(output.len(), 13);
+        assert_eq!(output.len(), 11);
        assert_eq!(output[0], 1u8 + 128u8);
    }

    #[test]
    fn test_skiplist9() {
        let mut output: Vec<u8> = Vec::new();
-        let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(3);
-        for i in 0..9 {
+        let mut skip_list_builder: SkipListBuilder<u64> = SkipListBuilder::new(4);
+        for i in 0..4 * 4 * 4 {
            skip_list_builder.insert(i, &i).unwrap();
        }
        skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
-        assert_eq!(output.len(), 117);
-        assert_eq!(output[0], 3u8 + 128u8);
+        assert_eq!(output.len(), 774);
+        assert_eq!(output[0], 4u8 + 128u8);
    }

    #[test]
    fn test_skiplist10() {
        // checking that void gets serialized to nothing.
        let mut output: Vec<u8> = Vec::new();
-        let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(3);
-        for i in 0..9 {
+        let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4);
+        for i in 0..((4 * 4 * 4) - 1) {
            skip_list_builder.insert(i, &()).unwrap();
        }
        skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
-        assert_eq!(output.len(), 81);
+        assert_eq!(output.len(), 230);
+        assert_eq!(output[0], 128u8 + 3u8);
+    }
+
+    #[test]
+    fn test_skiplist11() {
+        // checking that void gets serialized to nothing.
+        let mut output: Vec<u8> = Vec::new();
+        let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4);
+        for i in 0..(4 * 4) {
+            skip_list_builder.insert(i, &()).unwrap();
+        }
+        skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
+        assert_eq!(output.len(), 65);
        assert_eq!(output[0], 128u8 + 3u8);
    }

--- a/src/datastruct/skip/skiplist.rs
+++ b/src/datastruct/skip/skiplist.rs
@@ -1,6 +1,5 @@
-use common::BinarySerializable;
+use common::{BinarySerializable, VInt};
 use std::marker::PhantomData;
-use DocId;
 use std::cmp::max;

 static EMPTY: [u8; 0] = [];
@@ -8,21 +7,20 @@ static EMPTY: [u8; 0] = [];
 struct Layer<'a, T> {
    data: &'a [u8],
    cursor: &'a [u8],
-    next_id: DocId,
+    next_id: Option<u64>,
    _phantom_: PhantomData<T>,
 }

 impl<'a, T: BinarySerializable> Iterator for Layer<'a, T> {
-    type Item = (DocId, T);
+    type Item = (u64, T);

-    fn next(&mut self) -> Option<(DocId, T)> {
-        if self.next_id == u32::max_value() {
-            None
-        } else {
+    fn next(&mut self) -> Option<(u64, T)> {
+        if let Some(cur_id) = self.next_id {
            let cur_val = T::deserialize(&mut self.cursor).unwrap();
-            let cur_id = self.next_id;
-            self.next_id = u32::deserialize(&mut self.cursor).unwrap_or(u32::max_value());
+            self.next_id = VInt::deserialize_u64(&mut self.cursor).ok();
            Some((cur_id, cur_val))
+        } else {
+            None
        }
    }
 }
@@ -30,7 +28,7 @@ impl<'a, T: BinarySerializable> Iterator for Layer<'a, T> {
 impl<'a, T: BinarySerializable> From<&'a [u8]> for Layer<'a, T> {
    fn from(data: &'a [u8]) -> Layer<'a, T> {
        let mut cursor = data;
-        let next_id = u32::deserialize(&mut cursor).unwrap_or(u32::max_value());
+        let next_id = VInt::deserialize_u64(&mut cursor).ok();
        Layer {
            data,
            cursor,
@@ -45,14 +43,14 @@ impl<'a, T: BinarySerializable> Layer<'a, T> {
        Layer {
            data: &EMPTY,
            cursor: &EMPTY,
-            next_id: DocId::max_value(),
+            next_id: None,
            _phantom_: PhantomData,
        }
    }

    fn seek_offset(&mut self, offset: usize) {
        self.cursor = &self.data[offset..];
-        self.next_id = u32::deserialize(&mut self.cursor).unwrap_or(u32::max_value());
+        self.next_id = VInt::deserialize_u64(&mut self.cursor).ok();
    }

    // Returns the last element (key, val)
@@ -60,54 +58,61 @@ impl<'a, T: BinarySerializable> Layer<'a, T> {
    //
    // If there is no such element anymore,
    // returns None.
-    fn seek(&mut self, doc_id: DocId) -> Option<(DocId, T)> {
-        let mut val = None;
-        while self.next_id < doc_id {
-            match self.next() {
-                None => {
-                    break;
-                }
-                v => {
-                    val = v;
+    //
+    // If the element exists, it will be returned
+    // at the next call to `.next()`.
+    fn seek(&mut self, key: u64) -> Option<(u64, T)> {
+        let mut result: Option<(u64, T)> = None;
+        loop {
+            if let Some(next_id) = self.next_id {
+                if next_id < key {
+                    if let Some(v) = self.next() {
+                        result = Some(v);
+                        continue;
+                    }
                }
            }
+            return result;
        }
-        val
    }
 }

 pub struct SkipList<'a, T: BinarySerializable> {
    data_layer: Layer<'a, T>,
-    skip_layers: Vec<Layer<'a, u32>>,
+    skip_layers: Vec<Layer<'a, u64>>,
 }

 impl<'a, T: BinarySerializable> Iterator for SkipList<'a, T> {
-    type Item = (DocId, T);
+    type Item = (u64, T);

-    fn next(&mut self) -> Option<(DocId, T)> {
+    fn next(&mut self) -> Option<(u64, T)> {
        self.data_layer.next()
    }
 }

 impl<'a, T: BinarySerializable> SkipList<'a, T> {
-    pub fn seek(&mut self, doc_id: DocId) -> Option<(DocId, T)> {
-        let mut next_layer_skip: Option<(DocId, u32)> = None;
+    pub fn seek(&mut self, key: u64) -> Option<(u64, T)> {
+        let mut next_layer_skip: Option<(u64, u64)> = None;
        for skip_layer in &mut self.skip_layers {
            if let Some((_, offset)) = next_layer_skip {
                skip_layer.seek_offset(offset as usize);
            }
-            next_layer_skip = skip_layer.seek(doc_id);
+            next_layer_skip = skip_layer.seek(key);
        }
        if let Some((_, offset)) = next_layer_skip {
            self.data_layer.seek_offset(offset as usize);
        }
-        self.data_layer.seek(doc_id)
+        self.data_layer.seek(key)
    }
 }

 impl<'a, T: BinarySerializable> From<&'a [u8]> for SkipList<'a, T> {
    fn from(mut data: &'a [u8]) -> SkipList<'a, T> {
-        let offsets: Vec<u32> = Vec::deserialize(&mut data).unwrap();
+        let offsets: Vec<u64> = Vec::<VInt>::deserialize(&mut data)
+            .unwrap()
+            .into_iter()
+            .map(|el| el.0)
+            .collect();
        let num_layers = offsets.len();
        let layers_data: &[u8] = data;
        let data_layer: Layer<'a, T> = if num_layers == 0 {
--- a/src/datastruct/skip/skiplist_builder.rs
+++ b/src/datastruct/skip/skiplist_builder.rs
@@ -1,13 +1,11 @@
 use std::io::Write;
-use common::BinarySerializable;
+use common::{BinarySerializable, VInt, is_power_of_2};
 use std::marker::PhantomData;
-use DocId;
 use std::io;

 struct LayerBuilder<T: BinarySerializable> {
-    period: usize,
+    period_mask: usize,
    buffer: Vec<u8>,
-    remaining: usize,
    len: usize,
    _phantom_: PhantomData<T>,
 }
@@ -23,34 +21,33 @@ impl<T: BinarySerializable> LayerBuilder<T> {
    }

    fn with_period(period: usize) -> LayerBuilder<T> {
+        assert!(is_power_of_2(period), "The period has to be a power of 2.");
        LayerBuilder {
-            period,
+            period_mask: (period - 1),
            buffer: Vec::new(),
-            remaining: period,
            len: 0,
            _phantom_: PhantomData,
        }
    }

-    fn insert(&mut self, doc_id: DocId, value: &T) -> io::Result<Option<(DocId, u32)>> {
-        self.remaining -= 1;
+    fn insert(&mut self, key: u64, value: &T) -> io::Result<Option<(u64, u64)>> {
        self.len += 1;
-        let offset = self.written_size() as u32;
-        doc_id.serialize(&mut self.buffer)?;
+        let offset = self.written_size() as u64;
+        VInt(key).serialize(&mut self.buffer)?;
        value.serialize(&mut self.buffer)?;
-        Ok(if self.remaining == 0 {
-            self.remaining = self.period;
-            Some((doc_id, offset))
+        let emit_skip_info = (self.period_mask & self.len) == 0;
+        if emit_skip_info {
+            Ok(Some((key, offset)))
        } else {
-            None
-        })
+            Ok(None)
+        }
    }
 }

 pub struct SkipListBuilder<T: BinarySerializable> {
    period: usize,
    data_layer: LayerBuilder<T>,
-    skip_layers: Vec<LayerBuilder<u32>>,
+    skip_layers: Vec<LayerBuilder<u64>>,
 }

 impl<T: BinarySerializable> SkipListBuilder<T> {
@@ -62,7 +59,7 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
        }
    }

-    fn get_skip_layer(&mut self, layer_id: usize) -> &mut LayerBuilder<u32> {
+    fn get_skip_layer(&mut self, layer_id: usize) -> &mut LayerBuilder<u64> {
        if layer_id == self.skip_layers.len() {
            let layer_builder = LayerBuilder::with_period(self.period);
            self.skip_layers.push(layer_builder);
@@ -70,9 +67,9 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
        &mut self.skip_layers[layer_id]
    }

-    pub fn insert(&mut self, doc_id: DocId, dest: &T) -> io::Result<()> {
+    pub fn insert(&mut self, key: u64, dest: &T) -> io::Result<()> {
        let mut layer_id = 0;
-        let mut skip_pointer = self.data_layer.insert(doc_id, dest)?;
+        let mut skip_pointer = self.data_layer.insert(key, dest)?;
        loop {
            skip_pointer = match skip_pointer {
                Some((skip_doc_id, skip_offset)) => self.get_skip_layer(layer_id)
@@ -86,13 +83,11 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
    }

    pub fn write<W: Write>(self, output: &mut W) -> io::Result<()> {
-        let mut size: u32 = 0;
-        let mut layer_sizes: Vec<u32> = Vec::new();
-        size += self.data_layer.buffer.len() as u32;
-        layer_sizes.push(size);
+        let mut size: u64 = self.data_layer.buffer.len() as u64;
+        let mut layer_sizes = vec![VInt(size)];
        for layer in self.skip_layers.iter().rev() {
-            size += layer.buffer.len() as u32;
-            layer_sizes.push(size);
+            size += layer.buffer.len() as u64;
+            layer_sizes.push(VInt(size));
        }
        layer_sizes.serialize(output)?;
        self.data_layer.write(output)?;
--- a/src/datastruct/stacker/hashmap.rs
+++ b/src/datastruct/stacker/hashmap.rs
@@ -1,5 +1,6 @@
 use std::iter;
 use std::mem;
+use postings::UnorderedTermId;
 use super::heap::{BytesRef, Heap, HeapAllocable};

 mod murmurhash2 {
@@ -101,7 +102,7 @@ impl KeyValue {
 /// the computation of the hash of the key twice,
 /// or copying the key as long as there is no insert.
 ///
-pub struct HashMap<'a> {
+pub struct TermHashMap<'a> {
    table: Box<[KeyValue]>,
    heap: &'a Heap,
    mask: usize,
@@ -116,7 +117,11 @@ struct QuadraticProbing {

 impl QuadraticProbing {
    fn compute(hash: usize, mask: usize) -> QuadraticProbing {
-        QuadraticProbing { hash, i: 0, mask }
+        QuadraticProbing {
+            hash,
+            i: 0,
+            mask,
+        }
    }

    #[inline]
@@ -126,11 +131,11 @@ impl QuadraticProbing {
    }
 }

-impl<'a> HashMap<'a> {
-    pub fn new(num_bucket_power_of_2: usize, heap: &'a Heap) -> HashMap<'a> {
+impl<'a> TermHashMap<'a> {
+    pub fn new(num_bucket_power_of_2: usize, heap: &'a Heap) -> TermHashMap<'a> {
        let table_size = 1 << num_bucket_power_of_2;
        let table: Vec<KeyValue> = iter::repeat(KeyValue::default()).take(table_size).collect();
-        HashMap {
+        TermHashMap {
            table: table.into_boxed_slice(),
            heap,
            mask: table_size - 1,
@@ -153,22 +158,25 @@ impl<'a> HashMap<'a> {
        (key_bytes, expull_addr)
    }

-    pub fn set_bucket(&mut self, hash: u32, key_bytes_ref: BytesRef, bucket: usize) {
+    pub fn set_bucket(&mut self, hash: u32, key_value_addr: BytesRef, bucket: usize) {
        self.occupied.push(bucket);
        self.table[bucket] = KeyValue {
-            key_value_addr: key_bytes_ref,
-            hash,
+            key_value_addr, hash
        };
    }

-    pub fn iter<'b: 'a>(&'b self) -> impl Iterator<Item = (&'a [u8], u32)> + 'b {
+    pub fn iter<'b: 'a>(&'b self) -> impl Iterator<Item = (&'a [u8], u32, UnorderedTermId)> + 'b {
        self.occupied.iter().cloned().map(move |bucket: usize| {
            let kv = self.table[bucket];
-            self.get_key_value(kv.key_value_addr)
+            let (key, offset) = self.get_key_value(kv.key_value_addr);
+            (key, offset, bucket as UnorderedTermId)
        })
    }

-    pub fn get_or_create<S: AsRef<[u8]>, V: HeapAllocable>(&mut self, key: S) -> &mut V {
+    pub fn get_or_create<S: AsRef<[u8]>, V: HeapAllocable>(
+        &mut self,
+        key: S,
+    ) -> (UnorderedTermId, &mut V) {
        let key_bytes: &[u8] = key.as_ref();
        let hash = murmurhash2::murmurhash2(key.as_ref());
        let mut probe = self.probe(hash);
@@ -180,11 +188,14 @@ impl<'a> HashMap<'a> {
                let (addr, val): (u32, &mut V) = self.heap.allocate_object();
                assert_eq!(addr, key_bytes_ref.addr() + 2 + key_bytes.len() as u32);
                self.set_bucket(hash, key_bytes_ref, bucket);
-                return val;
+                return (bucket as UnorderedTermId, val);
            } else if kv.hash == hash {
                let (stored_key, expull_addr): (&[u8], u32) = self.get_key_value(kv.key_value_addr);
                if stored_key == key_bytes {
-                    return self.heap.get_mut_ref(expull_addr);
+                    return (
+                        bucket as UnorderedTermId,
+                        self.heap.get_mut_ref(expull_addr),
+                    );
                }
            }
        }
@@ -225,33 +236,33 @@ mod tests {
    #[test]
    fn test_hash_map() {
        let heap = Heap::with_capacity(2_000_000);
-        let mut hash_map: HashMap = HashMap::new(18, &heap);
+        let mut hash_map: TermHashMap = TermHashMap::new(18, &heap);
        {
-            let v: &mut TestValue = hash_map.get_or_create("abc");
+            let v: &mut TestValue = hash_map.get_or_create("abc").1;
            assert_eq!(v.val, 0u32);
            v.val = 3u32;
        }
        {
-            let v: &mut TestValue = hash_map.get_or_create("abcd");
+            let v: &mut TestValue = hash_map.get_or_create("abcd").1;
            assert_eq!(v.val, 0u32);
            v.val = 4u32;
        }
        {
-            let v: &mut TestValue = hash_map.get_or_create("abc");
+            let v: &mut TestValue = hash_map.get_or_create("abc").1;
            assert_eq!(v.val, 3u32);
        }
        {
-            let v: &mut TestValue = hash_map.get_or_create("abcd");
+            let v: &mut TestValue = hash_map.get_or_create("abcd").1;
            assert_eq!(v.val, 4u32);
        }
        let mut iter_values = hash_map.iter();
        {
-            let (_, addr) = iter_values.next().unwrap();
+            let (_, addr, _) = iter_values.next().unwrap();
            let val: &TestValue = heap.get_ref(addr);
            assert_eq!(val.val, 3u32);
        }
        {
-            let (_, addr) = iter_values.next().unwrap();
+            let (_, addr, _) = iter_values.next().unwrap();
            let val: &TestValue = heap.get_ref(addr);
            assert_eq!(val.val, 4u32);
        }
--- a/src/datastruct/stacker/mod.rs
+++ b/src/datastruct/stacker/mod.rs
@@ -4,7 +4,7 @@ mod expull;

 pub use self::heap::{Heap, HeapAllocable};
 pub use self::expull::ExpUnrolledLinkedList;
-pub use self::hashmap::HashMap;
+pub use self::hashmap::TermHashMap;

 #[test]
 fn test_unrolled_linked_list() {
@@ -16,15 +16,15 @@ fn test_unrolled_linked_list() {
        ks.push(2);
        ks.push(3);
        for k in (1..5).map(|k| k * 100) {
-            let mut hashmap: HashMap = HashMap::new(10, &heap);
+            let mut hashmap: TermHashMap = TermHashMap::new(10, &heap);
            for j in 0..k {
                for i in 0..500 {
-                    let v: &mut ExpUnrolledLinkedList = hashmap.get_or_create(i.to_string());
+                    let v: &mut ExpUnrolledLinkedList = hashmap.get_or_create(i.to_string()).1;
                    v.push(i * j, &heap);
                }
            }
            let mut map_addr: collections::HashMap<Vec<u8>, u32> = collections::HashMap::new();
-            for (key, addr) in hashmap.iter() {
+            for (key, addr, _) in hashmap.iter() {
                map_addr.insert(Vec::from(key), addr);
            }

--- a/src/directory/managed_directory.rs
+++ b/src/directory/managed_directory.rs
@@ -282,6 +282,7 @@ impl Clone for ManagedDirectory {
 mod tests {

    use super::*;
+    #[cfg(feature="mmap")]
    use directory::MmapDirectory;
    use std::path::Path;
    use std::io::Write;
@@ -293,6 +294,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature="mmap")]
    fn test_managed_directory() {
        let tempdir = TempDir::new("index").unwrap();
        let tempdir_path = PathBuf::from(tempdir.path());
@@ -341,6 +343,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature="mmap ")]
    fn test_managed_directory_gc_while_mmapped() {
        let tempdir = TempDir::new("index").unwrap();
        let tempdir_path = PathBuf::from(tempdir.path());
@@ -370,6 +373,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature="mmap")]
    fn test_managed_directory_protect() {
        let tempdir = TempDir::new("index").unwrap();
        let tempdir_path = PathBuf::from(tempdir.path());
--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -20,19 +20,17 @@ use std::sync::Arc;
 use std::sync::RwLock;
 use tempdir::TempDir;

-
 /// Returns None iff the file exists, can be read, but is empty (and hence
 /// cannot be mmapped).
 ///
-fn open_mmap(full_path: &PathBuf) -> result::Result<Option<MmapReadOnly>, OpenReadError> {
-    let file = File::open(&full_path)
-        .map_err(|e| {
-            if e.kind() == io::ErrorKind::NotFound {
-                OpenReadError::FileDoesNotExist(full_path.clone())
-            } else {
-                OpenReadError::IOError(IOError::with_path(full_path.to_owned(), e))
-            }
-        })?;
+fn open_mmap(full_path: &Path) -> result::Result<Option<MmapReadOnly>, OpenReadError> {
+    let file = File::open(full_path).map_err(|e| {
+        if e.kind() == io::ErrorKind::NotFound {
+            OpenReadError::FileDoesNotExist(full_path.to_owned())
+        } else {
+            OpenReadError::IOError(IOError::with_path(full_path.to_owned(), e))
+        }
+    })?;

    let meta_data = file.metadata()
        .map_err(|e| IOError::with_path(full_path.to_owned(), e))?;
@@ -44,9 +42,7 @@ fn open_mmap(full_path: &PathBuf) -> result::Result<Option<MmapReadOnly>, OpenRe
    }
    MmapReadOnly::open(&file)
        .map(Some)
-        .map_err(|e| {
-            From::from(IOError::with_path(full_path.to_owned(), e))
-        })
+        .map_err(|e| From::from(IOError::with_path(full_path.to_owned(), e)))
 }

 #[derive(Default, Clone, Debug, Serialize, Deserialize)]
@@ -79,7 +75,6 @@ impl Default for MmapCache {
 }

 impl MmapCache {
-    
    /// Removes a `MmapReadOnly` entry from the mmap cache.
    fn discard_from_cache(&mut self, full_path: &Path) -> bool {
        self.cache.remove(full_path).is_some()
@@ -93,23 +88,23 @@ impl MmapCache {
        }
    }

-    fn get_mmap(&mut self, full_path: PathBuf) -> Result<Option<MmapReadOnly>, OpenReadError> {
-        Ok(match self.cache.entry(full_path.clone()) {
-               HashMapEntry::Occupied(occupied_entry) => {
-                   let mmap = occupied_entry.get();
-                   self.counters.hit += 1;
-                   Some(mmap.clone())
-               }
-               HashMapEntry::Vacant(vacant_entry) => {
-                   self.counters.miss += 1;
-                   if let Some(mmap) = open_mmap(&full_path)? {
-                       vacant_entry.insert(mmap.clone());
-                       Some(mmap)
-                   } else {
-                       None
-                   }
-               }
-           })
+    fn get_mmap(&mut self, full_path: &Path) -> Result<Option<MmapReadOnly>, OpenReadError> {
+        Ok(match self.cache.entry(full_path.to_owned()) {
+            HashMapEntry::Occupied(occupied_entry) => {
+                let mmap = occupied_entry.get();
+                self.counters.hit += 1;
+                Some(mmap.clone())
+            }
+            HashMapEntry::Vacant(vacant_entry) => {
+                self.counters.miss += 1;
+                if let Some(mmap) = open_mmap(full_path)? {
+                    vacant_entry.insert(mmap.clone());
+                    Some(mmap)
+                } else {
+                    None
+                }
+            }
+        })
    }
 }

@@ -257,9 +252,9 @@ impl Directory for MmapDirectory {
        })?;

        Ok(mmap_cache
-           .get_mmap(full_path)?
-           .map(ReadOnlySource::Mmap)
-           .unwrap_or_else(|| ReadOnlySource::Anonymous(SharedVecSlice::empty())))
+            .get_mmap(&full_path)?
+            .map(ReadOnlySource::Mmap)
+            .unwrap_or_else(|| ReadOnlySource::Anonymous(SharedVecSlice::empty())))
    }

    fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError> {
@@ -292,20 +287,19 @@ impl Directory for MmapDirectory {
        Ok(BufWriter::new(Box::new(writer)))
    }

-
    /// Any entry associated to the path in the mmap will be
    /// removed before the file is deleted.
    fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
        debug!("Deleting file {:?}", path);
        let full_path = self.resolve_path(path);
-        let mut mmap_cache = self.mmap_cache
-            .write()
-            .map_err(|_| {
-                         let msg = format!("Failed to acquired write lock \
-                                            on mmap cache while deleting {:?}",
-                                           path);
-                         IOError::with_path(path.to_owned(), make_io_err(msg))
-                     })?;
+        let mut mmap_cache = self.mmap_cache.write().map_err(|_| {
+            let msg = format!(
+                "Failed to acquired write lock \
+                 on mmap cache while deleting {:?}",
+                path
+            );
+            IOError::with_path(path.to_owned(), make_io_err(msg))
+        })?;
        mmap_cache.discard_from_cache(path);

        // Removing the entry in the MMap cache.
@@ -414,9 +408,11 @@ mod tests {
                assert_eq!(mmap_directory.get_cache_info().mmapped.len(), num_paths);
            }
            for (i, path) in paths.iter().enumerate() {
-                println!("delete paths {:?}", path);
                mmap_directory.delete(path).unwrap();
-                assert_eq!(mmap_directory.get_cache_info().mmapped.len(), num_paths - i - 1);
+                assert_eq!(
+                    mmap_directory.get_cache_info().mmapped.len(),
+                    num_paths - i - 1
+                );
            }
        }
        assert_eq!(mmap_directory.get_cache_info().counters.hit, 10);
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -3,21 +3,29 @@
 WORM directory abstraction.

 */
+
+#[cfg(feature="mmap")]
 mod mmap_directory;
+
 mod ram_directory;
 mod directory;
 mod read_only_source;
 mod shared_vec_slice;
 mod managed_directory;
+mod static_directory;

 /// Errors specific to the directory module.
 pub mod error;

 use std::io::{BufWriter, Seek, Write};

+pub use self::static_directory::StaticDirectory;
+pub use self::static_directory::write_static_from_directory;
 pub use self::read_only_source::ReadOnlySource;
 pub use self::directory::Directory;
 pub use self::ram_directory::RAMDirectory;
+
+#[cfg(feature="mmap")]
 pub use self::mmap_directory::MmapDirectory;

 pub(crate) use self::read_only_source::SourceRead;
@@ -51,6 +59,7 @@ mod tests {
    }

    #[test]
+    #[cfg(feature="mmap")]
    fn test_mmap_directory() {
        let mut mmap_directory = MmapDirectory::create_from_tempdir().unwrap();
        test_directory(&mut mmap_directory);
@@ -116,9 +125,6 @@ mod tests {
            assert!(directory.open_read(*TEST_PATH).is_err());
            let _w = directory.open_write(*TEST_PATH).unwrap();
            assert!(directory.exists(*TEST_PATH));
-            if let Err(e) = directory.open_read(*TEST_PATH) {
-                println!("{:?}", e);
-            }
            assert!(directory.open_read(*TEST_PATH).is_ok());
            assert!(directory.delete(*TEST_PATH).is_ok());
        }
--- a/src/directory/read_only_source.rs
+++ b/src/directory/read_only_source.rs
@@ -1,10 +1,13 @@
+#[cfg(feature="mmap")]
 use fst::raw::MmapReadOnly;
 use std::ops::Deref;
 use super::shared_vec_slice::SharedVecSlice;
 use common::HasLen;
 use std::slice;
 use std::io::{self, Read};
-use stable_deref_trait::StableDeref;
+use stable_deref_trait::{CloneStableDeref, StableDeref};
+
+const EMPTY_SLICE: [u8; 0] = [];

 /// Read object that represents files in tantivy.
 ///
@@ -14,12 +17,16 @@ use stable_deref_trait::StableDeref;
 /// hold by this object should never be altered or destroyed.
 pub enum ReadOnlySource {
    /// Mmap source of data
+    #[cfg(feature="mmap")]
    Mmap(MmapReadOnly),
    /// Wrapping a `Vec<u8>`
    Anonymous(SharedVecSlice),
+    /// Wrapping a static slice
+    Static(&'static [u8])
 }

 unsafe impl StableDeref for ReadOnlySource {}
+unsafe impl CloneStableDeref for ReadOnlySource {}

 impl Deref for ReadOnlySource {
    type Target = [u8];
@@ -32,14 +39,16 @@ impl Deref for ReadOnlySource {
 impl ReadOnlySource {
    /// Creates an empty ReadOnlySource
    pub fn empty() -> ReadOnlySource {
-        ReadOnlySource::Anonymous(SharedVecSlice::empty())
+        ReadOnlySource::Static(&EMPTY_SLICE)
    }

    /// Returns the data underlying the ReadOnlySource object.
    pub fn as_slice(&self) -> &[u8] {
        match *self {
+            #[cfg(feature="mmap")]
            ReadOnlySource::Mmap(ref mmap_read_only) => unsafe { mmap_read_only.as_slice() },
            ReadOnlySource::Anonymous(ref shared_vec) => shared_vec.as_slice(),
+            ReadOnlySource::Static(data) => data,
        }
    }

@@ -62,7 +71,9 @@ impl ReadOnlySource {
    /// 1KB slice is remaining, the whole `500MBs`
    /// are retained in memory.
    pub fn slice(&self, from_offset: usize, to_offset: usize) -> ReadOnlySource {
+        assert!(from_offset <= to_offset, "Requested negative slice [{}..{}]", from_offset, to_offset);
        match *self {
+            #[cfg(feature="mmap")]
            ReadOnlySource::Mmap(ref mmap_read_only) => {
                let sliced_mmap = mmap_read_only.range(from_offset, to_offset - from_offset);
                ReadOnlySource::Mmap(sliced_mmap)
@@ -70,6 +81,9 @@ impl ReadOnlySource {
            ReadOnlySource::Anonymous(ref shared_vec) => {
                ReadOnlySource::Anonymous(shared_vec.slice(from_offset, to_offset))
            }
+            ReadOnlySource::Static(data) => {
+                ReadOnlySource::Static(&data[from_offset..to_offset])
+            }
        }
    }

@@ -110,6 +124,12 @@ impl From<Vec<u8>> for ReadOnlySource {
    }
 }

+impl From<&'static [u8]> for ReadOnlySource {
+    fn from(data: &'static [u8]) -> ReadOnlySource {
+        ReadOnlySource::Static(data)
+    }
+}
+
 /// Acts as a owning cursor over the data backed up by a `ReadOnlySource`
 pub(crate) struct SourceRead {
    _data_owner: ReadOnlySource,
@@ -121,6 +141,16 @@ impl SourceRead {
    pub fn advance(&mut self, len: usize) {
        self.cursor = &self.cursor[len..];
    }
+
+    pub fn slice_from(&self, start: usize) -> &[u8] {
+        &self.cursor[start..]
+
+    }
+
+    pub fn get(&self, idx: usize) -> u8 {
+        self.cursor[idx]
+    }
+
 }

 impl AsRef<[u8]> for SourceRead {
--- a/src/directory/static_directory.rs
+++ b/src/directory/static_directory.rs
@@ -0,0 +1,123 @@
+use std::collections::HashMap;
+use Directory;
+use std::path::PathBuf;
+use directory::ReadOnlySource;
+use std::io::BufWriter;
+use directory::error::{DeleteError, OpenReadError, OpenWriteError};
+use std::path::Path;
+use std::fmt::{Formatter, Debug, self};
+use Result as TantivyResult;
+use directory::SeekableWrite;
+use std::io;
+use std::fs;
+use common::Endianness;
+use common::BinarySerializable;
+use common::VInt;
+use byteorder::ByteOrder;
+use std::str;
+use std::fs::File;
+use std::io::{Read, Write};
+use std::ffi::OsString;
+
+#[derive(Clone)]
+pub struct StaticDirectory {
+    files: HashMap<PathBuf, &'static [u8]>,
+}
+
+impl Debug for StaticDirectory {
+    fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
+        write!(f, "StaticDirectory[{} files]", self.files.len())?;
+        Ok(())
+    }
+}
+
+impl StaticDirectory {
+    pub fn open(mut data: &'static [u8]) -> TantivyResult<StaticDirectory> {
+        assert!(data.len() > 8);
+        let footer_len_offset = data.len() - 8;
+        let body_len = Endianness::read_u64(&data[footer_len_offset..]) as usize;
+        let mut body = &data[..body_len];
+        let mut footer = &data[body_len..footer_len_offset];
+        let num_files = VInt::deserialize(&mut footer)?.0 as usize;
+        let mut files = HashMap::new();
+        for _ in 0..num_files {
+            let filename_len = VInt::deserialize(&mut footer)?.0 as usize;
+            let filename = &footer[..filename_len];
+            footer = &footer[filename_len..];
+            let data_len = VInt::deserialize(&mut footer)?.0 as usize;
+            let file_data = &body[..data_len];
+            body = &body[data_len..];
+            let filename_str = str::from_utf8(filename).expect("Invalid UTF8");
+            let filename = PathBuf::from(filename_str);
+            println!("{:?} {:?}", filename, data_len);
+            files.insert(filename, file_data);
+        }
+        Ok(StaticDirectory {
+            files
+        })
+    }
+}
+
+impl Directory for StaticDirectory {
+    fn open_read(&self, path: &Path) -> Result<ReadOnlySource, OpenReadError> {
+        if let Some(static_data) = self.files.get(path) {
+            Ok(ReadOnlySource::from(*static_data))
+        } else {
+            Err(OpenReadError::FileDoesNotExist(path.to_owned()))
+        }
+    }
+
+    fn delete(&self, path: &Path) -> Result<(), DeleteError> {
+        unimplemented!("Static directory is read-only !")
+    }
+
+    fn exists(&self, path: &Path) -> bool {
+        self.files.contains_key(path)
+    }
+
+    fn open_write(&mut self, path: &Path) -> Result<BufWriter<Box<SeekableWrite>>, OpenWriteError> {
+        unimplemented!("Static directory is read-only !")
+    }
+
+    fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> {
+        if let Some(static_data) = self.files.get(path) {
+            Ok(static_data.to_vec())
+        } else {
+            Err(OpenReadError::FileDoesNotExist(path.to_owned()))
+        }
+    }
+
+    fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
+        unimplemented!("Static directory is read-only !")
+    }
+
+    fn box_clone(&self) -> Box<Directory> {
+        box self.clone()
+    }
+}
+
+pub fn write_static_from_directory(directory_path: &Path) -> TantivyResult<Vec<u8>> {
+    assert!(directory_path.is_dir());
+    let mut file_data: Vec<(OsString, usize)> = Vec::new();
+    let mut write: Vec<u8> = Vec::new();
+    for entry in fs::read_dir(directory_path)? {
+        let entry = entry?;
+        let path = entry.path();
+        if path.is_file() {
+            info!("Appending {}", path.to_string_lossy());
+            let mut open_file = File::open(&path)?;
+            let file_len = open_file.read_to_end(&mut write)?;
+            file_data.push((entry.file_name(), file_len));
+        }
+    }
+    // write footer
+    let body_len = write.len();
+    VInt(file_data.len() as u64).serialize(&mut write)?;
+    for (filename, filelen) in file_data {
+        VInt(filename.len() as u64).serialize(&mut write)?;
+        write.write_all(filename.to_string_lossy().as_bytes())?;
+        VInt(filelen as u64).serialize(&mut write)?;
+    }
+    (body_len as u64).serialize(&mut write)?;
+    Ok(write)
+}
--- a/src/postings/docset.rs
+++ b/src/postings/docset.rs
@@ -2,6 +2,7 @@ use DocId;
 use std::borrow::Borrow;
 use std::borrow::BorrowMut;
 use std::cmp::Ordering;
+use common::BitSet;

 /// Expresses the outcome of a call to `DocSet`'s `.skip_next(...)`.
 #[derive(PartialEq, Eq, Debug)]
@@ -33,6 +34,9 @@ pub trait DocSet {
    /// More specifically, if the docset is already positionned on the target
    /// skipping will advance to the next position and return SkipResult::Overstep.
    ///
+    /// If `.skip_next()` oversteps, then the docset must be positionned correctly
+    /// on an existing document. In other words, `.doc()` should return the first document
+    /// greater than `DocId`.
    fn skip_next(&mut self, target: DocId) -> SkipResult {
        if !self.advance() {
            return SkipResult::End;
@@ -79,20 +83,27 @@ pub trait DocSet {
    /// Returns the current document
    fn doc(&self) -> DocId;

-    /// Advances the cursor to the next document
-    /// None is returned if the iterator has `DocSet`
-    /// has already been entirely consumed.
-    fn next(&mut self) -> Option<DocId> {
-        if self.advance() {
-            Some(self.doc())
-        } else {
-            None
+    /// Returns a best-effort hint of the
+    /// length of the docset.
+    fn size_hint(&self) -> u32;
+
+    /// Appends all docs to a `bitset`.
+    fn append_to_bitset(&mut self, bitset: &mut BitSet) {
+        while self.advance() {
+            bitset.insert(self.doc());
        }
    }

-    /// Returns a best-effort hint of the
-    /// length of the docset.
-    fn size_hint(&self) -> usize;
+    /// Returns the number documents matching.
+    ///
+    /// Calling this method consumes the `DocSet`.
+    fn count(&mut self) -> u32 {
+        let mut count = 0u32;
+        while self.advance() {
+            count += 1u32;
+        }
+        count
+    }
 }

 impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
@@ -111,30 +122,18 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
        unboxed.doc()
    }

-    fn size_hint(&self) -> usize {
+    fn size_hint(&self) -> u32 {
        let unboxed: &TDocSet = self.borrow();
        unboxed.size_hint()
    }
-}

-impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
-    fn advance(&mut self) -> bool {
-        let unref: &mut TDocSet = *self;
-        unref.advance()
+    fn count(&mut self) -> u32 {
+        let unboxed: &mut TDocSet = self.borrow_mut();
+        unboxed.count()
    }

-    fn skip_next(&mut self, target: DocId) -> SkipResult {
-        let unref: &mut TDocSet = *self;
-        unref.skip_next(target)
-    }
-
-    fn doc(&self) -> DocId {
-        let unref: &TDocSet = *self;
-        unref.doc()
-    }
-
-    fn size_hint(&self) -> usize {
-        let unref: &TDocSet = *self;
-        unref.size_hint()
+    fn append_to_bitset(&mut self, bitset: &mut BitSet) {
+        let unboxed: &mut TDocSet = self.borrow_mut();
+        unboxed.append_to_bitset(bitset);
    }
 }
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -51,21 +51,7 @@ impl DeleteBitSet {
        }
    }

-    /// Returns an empty delete bit set.
-    pub fn empty() -> DeleteBitSet {
-        DeleteBitSet {
-            data: ReadOnlySource::empty(),
-            len: 0,
-        }
-    }
-
-    /// Returns true iff the segment has some deleted documents.
-    pub fn has_deletes(&self) -> bool {
-        self.len() > 0
-    }
-
-    /// Returns true iff the document is deleted.
-    #[inline]
+    /// Returns whether the document has been marked as deleted.
    pub fn is_deleted(&self, doc: DocId) -> bool {
        if self.len == 0 {
            false
@@ -76,8 +62,10 @@ impl DeleteBitSet {
            b & (1u8 << shift) != 0
        }
    }
+
 }

+
 impl HasLen for DeleteBitSet {
    fn len(&self) -> usize {
        self.len
--- a/src/fastfield/facet_reader.rs
+++ b/src/fastfield/facet_reader.rs
@@ -0,0 +1,68 @@
+use super::MultiValueIntFastFieldReader;
+use DocId;
+use termdict::TermOrdinal;
+use schema::Facet;
+use termdict::{TermDictionary, TermDictionaryImpl};
+
+/// The facet reader makes it possible to access the list of
+/// facets associated to a given document in a specific
+/// segment.
+///
+/// Rather than manipulating `Facet` object directly, the API
+/// exposes those in the form of list of `Facet` ordinal.
+///
+/// A segment ordinal can then be translated into a facet via
+/// `.facet_from_ord(...)`.
+///
+/// Facet ordinals are defined as their position in the sorted
+/// list of facets. This ordinal is segment local and
+/// only makes sense for a given segment.
+pub struct FacetReader {
+    term_ords: MultiValueIntFastFieldReader<u64>,
+    term_dict: TermDictionaryImpl,
+}
+
+impl FacetReader {
+    /// Creates a new `FacetReader`.
+    ///
+    /// A facet reader just wraps :
+    /// - a `MultiValueIntFastFieldReader` that makes it possible to
+    /// access the list of facet ords for a given document.
+    /// - a `TermDictionaryImpl` that helps associating a facet to
+    /// an ordinal and vice versa.
+    pub fn new(
+        term_ords: MultiValueIntFastFieldReader<u64>,
+        term_dict: TermDictionaryImpl,
+    ) -> FacetReader {
+        FacetReader {
+            term_ords,
+            term_dict,
+        }
+    }
+
+    /// Returns the size of the sets of facets in the segment.
+    /// This does not take in account the documents that may be marked
+    /// as deleted.
+    ///
+    /// `Facet` ordinals range from `0` to `num_facets() - 1`.
+    pub fn num_facets(&self) -> usize {
+        self.term_dict.num_terms()
+    }
+
+    /// Accessor for the facet term dictionary.
+    pub fn facet_dict(&self) -> &TermDictionaryImpl {
+        &self.term_dict
+    }
+
+    /// Given a term ordinal returns the term associated to it.
+    pub fn facet_from_ord(&self, facet_ord: TermOrdinal, output: &mut Facet) {
+        let found_term = self.term_dict
+            .ord_to_term(facet_ord as u64, output.inner_buffer_mut());
+        assert!(found_term, "Term ordinal {} no found.", facet_ord);
+    }
+
+    /// Return the list of facet ordinals associated to a document.
+    pub fn facet_ords(&mut self, doc: DocId, output: &mut Vec<u64>) {
+        self.term_ords.get_vals(doc, output);
+    }
+}
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -23,36 +23,119 @@ values stored.
 Read access performance is comparable to that of an array lookup.
 */

+use common;
+use schema::Cardinality;
+use schema::FieldType;
+use schema::Value;
+pub use self::delete::DeleteBitSet;
+pub use self::delete::write_delete_bitset;
+pub use self::error::{FastFieldNotAvailableError, Result};
+pub use self::facet_reader::FacetReader;
+pub use self::multivalued::MultiValueIntFastFieldReader;
+pub use self::reader::FastFieldReader;
+pub use self::serializer::FastFieldSerializer;
+pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
+
 mod reader;
 mod writer;
 mod serializer;
 mod error;
 mod delete;
+mod facet_reader;
+mod multivalued;

-pub use self::delete::write_delete_bitset;
-pub use self::delete::DeleteBitSet;
-pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
-pub use self::reader::{I64FastFieldReader, U64FastFieldReader};
-pub use self::reader::FastFieldReader;
-pub use self::serializer::FastFieldSerializer;
-pub use self::error::{FastFieldNotAvailableError, Result};
+/// Trait for types that are allowed for fast fields: (u64 or i64).
+pub trait FastValue: Default + Clone + Copy {
+    /// Converts a value from u64
+    ///
+    /// Internally all fast field values are encoded as u64.
+    fn from_u64(val: u64) -> Self;
+
+    /// Converts a value to u64.
+    ///
+    /// Internally all fast field values are encoded as u64.
+    fn to_u64(&self) -> u64;
+
+    /// Returns the fast field cardinality that can be extracted from the given
+    /// `FieldType`.
+    ///
+    /// If the type is not a fast field, `None` is returned.
+    fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality>;
+
+    /// Cast value to `u64`.
+    /// The value is just reinterpreted in memory.
+    fn as_u64(&self) -> u64;
+}
+
+impl FastValue for u64 {
+    fn from_u64(val: u64) -> Self {
+        val
+    }
+
+    fn to_u64(&self) -> u64 {
+        *self
+    }
+
+    fn as_u64(&self) -> u64 {
+        *self
+    }
+
+    fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
+        match *field_type {
+            FieldType::U64(ref integer_options) => integer_options.get_fastfield_cardinality(),
+            FieldType::HierarchicalFacet => Some(Cardinality::MultiValues),
+            _ => None,
+        }
+    }
+}
+
+impl FastValue for i64 {
+    fn from_u64(val: u64) -> Self {
+        common::u64_to_i64(val)
+    }
+
+    fn to_u64(&self) -> u64 {
+        common::i64_to_u64(*self)
+    }
+
+    fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
+        match *field_type {
+            FieldType::I64(ref integer_options) => integer_options.get_fastfield_cardinality(),
+            _ => None,
+        }
+    }
+
+    fn as_u64(&self) -> u64 {
+        *self as u64
+    }
+}
+
+fn value_to_u64(value: &Value) -> u64 {
+    match *value {
+        Value::U64(ref val) => *val,
+        Value::I64(ref val) => common::i64_to_u64(*val),
+        _ => panic!("Expected a u64/i64 field, got {:?} ", value),
+    }
+}

 #[cfg(test)]
 mod tests {
-    use super::*;
-    use schema::Field;
-    use std::path::Path;
+
+    use common::CompositeFile;
    use directory::{Directory, RAMDirectory, WritePtr};
-    use schema::Document;
-    use schema::{Schema, SchemaBuilder};
-    use schema::FAST;
-    use test::Bencher;
-    use test;
    use fastfield::FastFieldReader;
    use rand::Rng;
    use rand::SeedableRng;
-    use common::CompositeFile;
    use rand::XorShiftRng;
+    use schema::{Schema, SchemaBuilder};
+    use schema::Document;
+    use schema::FAST;
+    use schema::Field;
+    use std::collections::HashMap;
+    use std::path::Path;
+    use super::*;
+    use test;
+    use test::Bencher;

    lazy_static! {
        static ref SCHEMA: Schema = {
@@ -65,15 +148,9 @@ mod tests {
        };
    }

-    fn add_single_field_doc(fast_field_writers: &mut FastFieldsWriter, field: Field, value: u64) {
-        let mut doc = Document::default();
-        doc.add_u64(field, value);
-        fast_field_writers.add_document(&doc);
-    }
-
    #[test]
    pub fn test_fastfield() {
-        let test_fastfield = U64FastFieldReader::from(vec![100, 200, 300]);
+        let test_fastfield = FastFieldReader::<u64>::from(vec![100, 200, 300]);
        assert_eq!(test_fastfield.get(0), 100);
        assert_eq!(test_fastfield.get(1), 200);
        assert_eq!(test_fastfield.get(2), 300);
@@ -87,20 +164,22 @@ mod tests {
            let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
            let mut serializer = FastFieldSerializer::from_write(write).unwrap();
            let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 13u64);
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 14u64);
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 2u64);
-            fast_field_writers.serialize(&mut serializer).unwrap();
+            fast_field_writers.add_document(&doc!(*FIELD=>13u64));
+            fast_field_writers.add_document(&doc!(*FIELD=>14u64));
+            fast_field_writers.add_document(&doc!(*FIELD=>2u64));
+            fast_field_writers
+                .serialize(&mut serializer, &HashMap::new())
+                .unwrap();
            serializer.close().unwrap();
        }
        let source = directory.open_read(&path).unwrap();
        {
-            assert_eq!(source.len(), 35 as usize);
+            assert_eq!(source.len(), 36 as usize);
        }
        {
            let composite_file = CompositeFile::open(&source).unwrap();
            let field_source = composite_file.open_read(*FIELD).unwrap();
-            let fast_field_reader: U64FastFieldReader = U64FastFieldReader::open(field_source);
+            let fast_field_reader = FastFieldReader::<u64>::open(field_source);
            assert_eq!(fast_field_reader.get(0), 13u64);
            assert_eq!(fast_field_reader.get(1), 14u64);
            assert_eq!(fast_field_reader.get(2), 2u64);
@@ -115,26 +194,28 @@ mod tests {
            let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
            let mut serializer = FastFieldSerializer::from_write(write).unwrap();
            let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 4u64);
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 14_082_001u64);
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 3_052u64);
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 9002u64);
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 15_001u64);
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 777u64);
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 1_002u64);
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 1_501u64);
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 215u64);
-            fast_field_writers.serialize(&mut serializer).unwrap();
+            fast_field_writers.add_document(&doc!(*FIELD=>4u64));
+            fast_field_writers.add_document(&doc!(*FIELD=>14_082_001u64));
+            fast_field_writers.add_document(&doc!(*FIELD=>3_052u64));
+            fast_field_writers.add_document(&doc!(*FIELD=>9_002u64));
+            fast_field_writers.add_document(&doc!(*FIELD=>15_001u64));
+            fast_field_writers.add_document(&doc!(*FIELD=>777u64));
+            fast_field_writers.add_document(&doc!(*FIELD=>1_002u64));
+            fast_field_writers.add_document(&doc!(*FIELD=>1_501u64));
+            fast_field_writers.add_document(&doc!(*FIELD=>215u64));
+            fast_field_writers
+                .serialize(&mut serializer, &HashMap::new())
+                .unwrap();
            serializer.close().unwrap();
        }
        let source = directory.open_read(&path).unwrap();
        {
-            assert_eq!(source.len(), 60 as usize);
+            assert_eq!(source.len(), 61 as usize);
        }
        {
            let fast_fields_composite = CompositeFile::open(&source).unwrap();
-            let fast_field_reader: U64FastFieldReader =
-                U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
+            let data = fast_fields_composite.open_read(*FIELD).unwrap();
+            let fast_field_reader = FastFieldReader::<u64>::open(data);
            assert_eq!(fast_field_reader.get(0), 4u64);
            assert_eq!(fast_field_reader.get(1), 14_082_001u64);
            assert_eq!(fast_field_reader.get(2), 3_052u64);
@@ -157,19 +238,21 @@ mod tests {
            let mut serializer = FastFieldSerializer::from_write(write).unwrap();
            let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
            for _ in 0..10_000 {
-                add_single_field_doc(&mut fast_field_writers, *FIELD, 100_000u64);
+                fast_field_writers.add_document(&doc!(*FIELD=>100_000u64));
            }
-            fast_field_writers.serialize(&mut serializer).unwrap();
+            fast_field_writers
+                .serialize(&mut serializer, &HashMap::new())
+                .unwrap();
            serializer.close().unwrap();
        }
        let source = directory.open_read(&path).unwrap();
        {
-            assert_eq!(source.len(), 33 as usize);
+            assert_eq!(source.len(), 34 as usize);
        }
        {
            let fast_fields_composite = CompositeFile::open(&source).unwrap();
-            let fast_field_reader: U64FastFieldReader =
-                U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
+            let data = fast_fields_composite.open_read(*FIELD).unwrap();
+            let fast_field_reader = FastFieldReader::<u64>::open(data);
            for doc in 0..10_000 {
                assert_eq!(fast_field_reader.get(doc), 100_000u64);
            }
@@ -186,26 +269,23 @@ mod tests {
            let mut serializer = FastFieldSerializer::from_write(write).unwrap();
            let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
            // forcing the amplitude to be high
-            add_single_field_doc(&mut fast_field_writers, *FIELD, 0u64);
+            fast_field_writers.add_document(&doc!(*FIELD=>0u64));
            for i in 0u64..10_000u64 {
-                add_single_field_doc(
-                    &mut fast_field_writers,
-                    *FIELD,
-                    5_000_000_000_000_000_000u64 + i,
-                );
+                fast_field_writers.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + i));
            }
-            fast_field_writers.serialize(&mut serializer).unwrap();
+            fast_field_writers
+                .serialize(&mut serializer, &HashMap::new())
+                .unwrap();
            serializer.close().unwrap();
        }
        let source = directory.open_read(&path).unwrap();
        {
-            assert_eq!(source.len(), 80041 as usize);
+            assert_eq!(source.len(), 80042 as usize);
        }
        {
            let fast_fields_composite = CompositeFile::open(&source).unwrap();
-            let fast_field_reader: U64FastFieldReader =
-                U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
-
+            let data = fast_fields_composite.open_read(*FIELD).unwrap();
+            let fast_field_reader = FastFieldReader::<u64>::open(data);
            assert_eq!(fast_field_reader.get(0), 0u64);
            for doc in 1..10_001 {
                assert_eq!(
@@ -233,17 +313,19 @@ mod tests {
                doc.add_i64(i64_field, i);
                fast_field_writers.add_document(&doc);
            }
-            fast_field_writers.serialize(&mut serializer).unwrap();
+            fast_field_writers
+                .serialize(&mut serializer, &HashMap::new())
+                .unwrap();
            serializer.close().unwrap();
        }
        let source = directory.open_read(&path).unwrap();
        {
-            assert_eq!(source.len(), 17708 as usize);
+            assert_eq!(source.len(), 17709 as usize);
        }
        {
            let fast_fields_composite = CompositeFile::open(&source).unwrap();
-            let fast_field_reader: I64FastFieldReader =
-                I64FastFieldReader::open(fast_fields_composite.open_read(i64_field).unwrap());
+            let data = fast_fields_composite.open_read(i64_field).unwrap();
+            let fast_field_reader = FastFieldReader::<i64>::open(data);

            assert_eq!(fast_field_reader.min_value(), -100i64);
            assert_eq!(fast_field_reader.max_value(), 9_999i64);
@@ -272,15 +354,17 @@ mod tests {
            let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
            let doc = Document::default();
            fast_field_writers.add_document(&doc);
-            fast_field_writers.serialize(&mut serializer).unwrap();
+            fast_field_writers
+                .serialize(&mut serializer, &HashMap::new())
+                .unwrap();
            serializer.close().unwrap();
        }

        let source = directory.open_read(&path).unwrap();
        {
            let fast_fields_composite = CompositeFile::open(&source).unwrap();
-            let fast_field_reader: I64FastFieldReader =
-                I64FastFieldReader::open(fast_fields_composite.open_read(i64_field).unwrap());
+            let data = fast_fields_composite.open_read(i64_field).unwrap();
+            let fast_field_reader = FastFieldReader::<i64>::open(data);
            assert_eq!(fast_field_reader.get(0u32), 0i64);
        }
    }
@@ -303,17 +387,19 @@ mod tests {
            let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
            let mut serializer = FastFieldSerializer::from_write(write).unwrap();
            let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
-            for x in &permutation {
-                add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
+            for &x in &permutation {
+                fast_field_writers.add_document(&doc!(*FIELD=>x));
            }
-            fast_field_writers.serialize(&mut serializer).unwrap();
+            fast_field_writers
+                .serialize(&mut serializer, &HashMap::new())
+                .unwrap();
            serializer.close().unwrap();
        }
        let source = directory.open_read(&path).unwrap();
        {
            let fast_fields_composite = CompositeFile::open(&source).unwrap();
-            let fast_field_reader: U64FastFieldReader =
-                U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
+            let data = fast_fields_composite.open_read(*FIELD).unwrap();
+            let fast_field_reader = FastFieldReader::<u64>::open(data);

            let mut a = 0u64;
            for _ in 0..n {
@@ -329,7 +415,7 @@ mod tests {
        b.iter(|| {
            let n = test::black_box(7000u32);
            let mut a = 0u64;
-            for i in Iterator::step_by((0u32..n), 7) {
+            for i in Iterator::step_by(0u32..n, 7) {
                a ^= permutation[i as usize];
            }
            a
@@ -358,22 +444,24 @@ mod tests {
            let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
            let mut serializer = FastFieldSerializer::from_write(write).unwrap();
            let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
-            for x in &permutation {
-                add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
+            for &x in &permutation {
+                fast_field_writers.add_document(&doc!(*FIELD=>x));
            }
-            fast_field_writers.serialize(&mut serializer).unwrap();
+            fast_field_writers
+                .serialize(&mut serializer, &HashMap::new())
+                .unwrap();
            serializer.close().unwrap();
        }
        let source = directory.open_read(&path).unwrap();
        {
            let fast_fields_composite = CompositeFile::open(&source).unwrap();
-            let fast_field_reader: U64FastFieldReader =
-                U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
+            let data = fast_fields_composite.open_read(*FIELD).unwrap();
+            let fast_field_reader = FastFieldReader::<u64>::open(data);

            b.iter(|| {
                let n = test::black_box(7000u32);
                let mut a = 0u64;
-                for i in Iterator::step_by((0u32..n), 7) {
+                for i in Iterator::step_by(0u32..n, 7) {
                    a ^= fast_field_reader.get(i);
                }
                a
@@ -390,17 +478,19 @@ mod tests {
            let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
            let mut serializer = FastFieldSerializer::from_write(write).unwrap();
            let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
-            for x in &permutation {
-                add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
+            for &x in &permutation {
+                fast_field_writers.add_document(&doc!(*FIELD=>x));
            }
-            fast_field_writers.serialize(&mut serializer).unwrap();
+            fast_field_writers
+                .serialize(&mut serializer, &HashMap::new())
+                .unwrap();
            serializer.close().unwrap();
        }
        let source = directory.open_read(&path).unwrap();
        {
            let fast_fields_composite = CompositeFile::open(&source).unwrap();
-            let fast_field_reader: U64FastFieldReader =
-                U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap());
+            let data = fast_fields_composite.open_read(*FIELD).unwrap();
+            let fast_field_reader = FastFieldReader::<u64>::open(data);

            b.iter(|| {
                let n = test::black_box(1000u32);
--- a/src/fastfield/multivalued/mod.rs
+++ b/src/fastfield/multivalued/mod.rs
@@ -0,0 +1,88 @@
+mod writer;
+mod reader;
+
+pub use self::writer::MultiValueIntFastFieldWriter;
+pub use self::reader::MultiValueIntFastFieldReader;
+
+#[cfg(test)]
+mod tests {
+
+    use schema::SchemaBuilder;
+    use schema::Cardinality;
+    use schema::IntOptions;
+    use Index;
+
+    #[test]
+    fn test_multivalued_u64() {
+        let mut schema_builder = SchemaBuilder::default();
+        let field = schema_builder.add_u64_field(
+            "multifield",
+            IntOptions::default().set_fast(Cardinality::MultiValues),
+        );
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        index_writer.add_document(doc!(field=>1u64, field=>3u64));
+        index_writer.add_document(doc!());
+        index_writer.add_document(doc!(field=>4u64));
+        index_writer.add_document(doc!(field=>5u64, field=>20u64,field=>1u64));
+        assert!(index_writer.commit().is_ok());
+
+        index.load_searchers().unwrap();
+        let searcher = index.searcher();
+        let reader = searcher.segment_reader(0);
+        let mut vals = Vec::new();
+        let multi_value_reader = reader.multi_fast_field_reader::<u64>(field).unwrap();
+        {
+            multi_value_reader.get_vals(2, &mut vals);
+            assert_eq!(&vals, &[4u64]);
+        }
+        {
+            multi_value_reader.get_vals(0, &mut vals);
+            assert_eq!(&vals, &[1u64, 3u64]);
+        }
+        {
+            multi_value_reader.get_vals(1, &mut vals);
+            assert!(vals.is_empty());
+        }
+    }
+
+    #[test]
+    fn test_multivalued_i64() {
+        let mut schema_builder = SchemaBuilder::default();
+        let field = schema_builder.add_i64_field(
+            "multifield",
+            IntOptions::default().set_fast(Cardinality::MultiValues),
+        );
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        index_writer.add_document(doc!(field=> 1i64, field => 3i64));
+        index_writer.add_document(doc!());
+        index_writer.add_document(doc!(field=> -4i64));
+        index_writer.add_document(doc!(field=> -5i64, field => -20i64, field=>1i64));
+        assert!(index_writer.commit().is_ok());
+
+        index.load_searchers().unwrap();
+        let searcher = index.searcher();
+        let reader = searcher.segment_reader(0);
+        let mut vals = Vec::new();
+        let multi_value_reader = reader.multi_fast_field_reader::<i64>(field).unwrap();
+        {
+            multi_value_reader.get_vals(2, &mut vals);
+            assert_eq!(&vals, &[-4i64]);
+        }
+        {
+            multi_value_reader.get_vals(0, &mut vals);
+            assert_eq!(&vals, &[1i64, 3i64]);
+        }
+        {
+            multi_value_reader.get_vals(1, &mut vals);
+            assert!(vals.is_empty());
+        }
+        {
+            multi_value_reader.get_vals(3, &mut vals);
+            assert_eq!(&vals, &[-5i64, -20i64, 1i64]);
+        }
+    }
+}
--- a/src/fastfield/multivalued/reader.rs
+++ b/src/fastfield/multivalued/reader.rs
@@ -0,0 +1,109 @@
+use DocId;
+use fastfield::{FastFieldReader, FastValue};
+
+/// Reader for a multivalued `u64` fast field.
+///
+/// The reader is implemented as two `u64` fast field.
+///
+/// The `vals_reader` will access the concatenated list of all
+/// values for all reader.
+/// The `idx_reader` associated, for each document, the index of its first value.
+///
+#[derive(Clone)]
+pub struct MultiValueIntFastFieldReader<Item: FastValue> {
+    idx_reader: FastFieldReader<u64>,
+    vals_reader: FastFieldReader<Item>,
+}
+
+impl<Item: FastValue> MultiValueIntFastFieldReader<Item> {
+    pub(crate) fn open(
+        idx_reader: FastFieldReader<u64>,
+        vals_reader: FastFieldReader<Item>,
+    ) -> MultiValueIntFastFieldReader<Item> {
+        MultiValueIntFastFieldReader {
+            idx_reader,
+            vals_reader,
+        }
+    }
+
+    /// Returns the array of values associated to the given `doc`.
+    pub fn get_vals(&self, doc: DocId, vals: &mut Vec<Item>) {
+        let start = self.idx_reader.get(doc) as u32;
+        let stop = self.idx_reader.get(doc + 1) as u32;
+        let len = (stop - start) as usize;
+        vals.resize(len, Item::default());
+        self.vals_reader.get_range(start, &mut vals[..]);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use core::Index;
+    use schema::{Document, Facet, SchemaBuilder};
+
+    #[test]
+    fn test_multifastfield_reader() {
+        let mut schema_builder = SchemaBuilder::new();
+        let facet_field = schema_builder.add_facet_field("facets");
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index
+            .writer_with_num_threads(1, 30_000_000)
+            .expect("Failed to create index writer.");
+        {
+            let mut doc = Document::new();
+            doc.add_facet(facet_field, "/category/cat2");
+            doc.add_facet(facet_field, "/category/cat1");
+            index_writer.add_document(doc);
+        }
+        {
+            let mut doc = Document::new();
+            doc.add_facet(facet_field, "/category/cat2");
+            index_writer.add_document(doc);
+        }
+        {
+            let mut doc = Document::new();
+            doc.add_facet(facet_field, "/category/cat3");
+            index_writer.add_document(doc);
+        }
+        index_writer.commit().expect("Commit failed");
+        index.load_searchers().expect("Reloading searchers");
+        let searcher = index.searcher();
+        let segment_reader = searcher.segment_reader(0);
+        let mut facet_reader = segment_reader.facet_reader(facet_field).unwrap();
+
+        let mut facet = Facet::root();
+        {
+            facet_reader.facet_from_ord(1, &mut facet);
+            assert_eq!(facet, Facet::from("/category"));
+        }
+        {
+            facet_reader.facet_from_ord(2, &mut facet);
+            assert_eq!(facet, Facet::from("/category/cat1"));
+        }
+        {
+            facet_reader.facet_from_ord(3, &mut facet);
+            assert_eq!(format!("{}", facet), "/category/cat2");
+            assert_eq!(facet, Facet::from("/category/cat2"));
+        }
+        {
+            facet_reader.facet_from_ord(4, &mut facet);
+            assert_eq!(facet, Facet::from("/category/cat3"));
+        }
+
+        let mut vals = Vec::new();
+        {
+            facet_reader.facet_ords(0, &mut vals);
+            assert_eq!(&vals[..], &[3, 2]);
+        }
+        {
+            facet_reader.facet_ords(1, &mut vals);
+            assert_eq!(&vals[..], &[3]);
+        }
+        {
+            facet_reader.facet_ords(2, &mut vals);
+            assert_eq!(&vals[..], &[4]);
+        }
+    }
+}
--- a/src/fastfield/multivalued/writer.rs
+++ b/src/fastfield/multivalued/writer.rs
@@ -0,0 +1,111 @@
+use fastfield::FastFieldSerializer;
+use fastfield::serializer::FastSingleFieldSerializer;
+use fastfield::value_to_u64;
+use std::collections::HashMap;
+use postings::UnorderedTermId;
+use schema::{Document, Field};
+use std::io;
+use itertools::Itertools;
+
+pub struct MultiValueIntFastFieldWriter {
+    field: Field,
+    vals: Vec<u64>,
+    doc_index: Vec<u64>,
+    is_facet: bool,
+}
+
+impl MultiValueIntFastFieldWriter {
+    /// Creates a new `IntFastFieldWriter`
+    pub fn new(field: Field, is_facet: bool) -> Self {
+        MultiValueIntFastFieldWriter {
+            field,
+            vals: Vec::new(),
+            doc_index: Vec::new(),
+            is_facet,
+        }
+    }
+
+    pub fn field(&self) -> Field {
+        self.field
+    }
+
+    pub fn next_doc(&mut self) {
+        self.doc_index.push(self.vals.len() as u64);
+    }
+
+    /// Records a new value.
+    ///
+    /// The n-th value being recorded is implicitely
+    /// associated to the document with the `DocId` n.
+    /// (Well, `n-1` actually because of 0-indexing)
+    pub fn add_val(&mut self, val: UnorderedTermId) {
+        self.vals.push(val);
+    }
+
+    pub fn add_document(&mut self, doc: &Document) {
+        if !self.is_facet {
+            for field_value in doc.field_values() {
+                if field_value.field() == self.field {
+                    self.add_val(value_to_u64(field_value.value()));
+                }
+            }
+        }
+    }
+
+    /// Serializes fast field values by pushing them to the `FastFieldSerializer`.
+    ///
+    /// HashMap makes it possible to remap them before serializing.
+    /// Specifically, string terms are first stored in the writer as their
+    /// position in the `IndexWriter`'s `HashMap`. This value is called
+    /// an `UnorderedTermId`.
+    ///
+    /// During the serialization of the segment, terms gets sorted and
+    /// `tantivy` builds a mapping to convert this `UnorderedTermId` into
+    /// term ordinals.
+    ///
+    pub fn serialize(
+        &self,
+        serializer: &mut FastFieldSerializer,
+        mapping_opt: Option<&HashMap<UnorderedTermId, usize>>,
+    ) -> io::Result<()> {
+        {
+            // writing the offset index
+            let mut doc_index_serializer =
+                serializer.new_u64_fast_field_with_idx(self.field, 0, self.vals.len() as u64, 0)?;
+            for &offset in &self.doc_index {
+                doc_index_serializer.add_val(offset)?;
+            }
+            doc_index_serializer.add_val(self.vals.len() as u64)?;
+            doc_index_serializer.close_field()?;
+        }
+        {
+            // writing the values themselves.
+            let mut value_serializer: FastSingleFieldSerializer<_>;
+            match mapping_opt {
+                Some(mapping) => {
+                    value_serializer = serializer.new_u64_fast_field_with_idx(
+                        self.field,
+                        0u64,
+                        mapping.len() as u64,
+                        1,
+                    )?;
+                    for val in &self.vals {
+                        let remapped_val = *mapping.get(val).expect("Missing term ordinal") as u64;
+                        value_serializer.add_val(remapped_val)?;
+                    }
+                }
+                None => {
+                    let val_min_max = self.vals.iter().cloned().minmax();
+                    let (val_min, val_max) = val_min_max.into_option().unwrap_or((0u64, 0));
+                    value_serializer =
+                        serializer.new_u64_fast_field_with_idx(self.field, val_min, val_max, 1)?;
+                    for &val in &self.vals {
+                        value_serializer.add_val(val)?;
+                    }
+                }
+            }
+            value_serializer.close_field()?;
+        }
+        Ok(())
+    }
+}
--- a/src/fastfield/reader.rs
+++ b/src/fastfield/reader.rs
@@ -1,107 +1,35 @@
-use directory::ReadOnlySource;
-use common::{self, BinarySerializable};
-use common::bitpacker::{compute_num_bits, BitUnpacker};
-use DocId;
-use schema::SchemaBuilder;
-use std::path::Path;
-use schema::FAST;
-use directory::{Directory, RAMDirectory, WritePtr};
-use fastfield::{FastFieldSerializer, FastFieldsWriter};
-use schema::FieldType;
-use std::mem;
+use common::BinarySerializable;
+use common::bitpacker::BitUnpacker;
 use common::CompositeFile;
+use common::compute_num_bits;
+use directory::{Directory, RAMDirectory, WritePtr};
+use directory::ReadOnlySource;
+use DocId;
+use fastfield::{FastFieldSerializer, FastFieldsWriter};
 use owning_ref::OwningRef;
+use schema::FAST;
+use schema::SchemaBuilder;
+use std::collections::HashMap;
+use std::marker::PhantomData;
+use std::mem;
+use std::path::Path;
+use super::FastValue;

 /// Trait for accessing a fastfield.
 ///
 /// Depending on the field type, a different
 /// fast field is required.
-pub trait FastFieldReader: Sized {
-    /// Type of the value stored in the fastfield.
-    type ValueType;
-
-    /// Return the value associated to the given document.
-    ///
-    /// This accessor should return as fast as possible.
-    ///
-    /// # Panics
-    ///
-    /// May panic if `doc` is greater than the segment
-    // `maxdoc`.
-    fn get(&self, doc: DocId) -> Self::ValueType;
-
-    /// Fills an output buffer with the fast field values
-    /// associated with the `DocId` going from
-    /// `start` to `start + output.len()`.
-    ///
-    /// # Panics
-    ///
-    /// May panic if `start + output.len()` is greater than
-    /// the segment's `maxdoc`.
-    fn get_range(&self, start: u32, output: &mut [Self::ValueType]);
-
-    /// Opens a fast field given a source.
-    fn open(source: ReadOnlySource) -> Self;
-
-    /// Returns true iff the given field_type makes
-    /// it possible to access the field values via a
-    /// fastfield.
-    fn is_enabled(field_type: &FieldType) -> bool;
-}
-
-/// `FastFieldReader` for unsigned 64-bits integers.
-pub struct U64FastFieldReader {
+#[derive(Clone)]
+pub struct FastFieldReader<Item: FastValue> {
    bit_unpacker: BitUnpacker<OwningRef<ReadOnlySource, [u8]>>,
-    min_value: u64,
-    max_value: u64,
+    min_value_u64: u64,
+    max_value_u64: u64,
+    _phantom: PhantomData<Item>,
 }

-impl U64FastFieldReader {
-    /// Returns the minimum value for this fast field.
-    ///
-    /// The min value does not take in account of possible
-    /// deleted document, and should be considered as a lower bound
-    /// of the actual minimum value.
-    pub fn min_value(&self) -> u64 {
-        self.min_value
-    }
-
-    /// Returns the maximum value for this fast field.
-    ///
-    /// The max value does not take in account of possible
-    /// deleted document, and should be considered as an upper bound
-    /// of the actual maximum value.
-    pub fn max_value(&self) -> u64 {
-        self.max_value
-    }
-}
-
-impl FastFieldReader for U64FastFieldReader {
-    type ValueType = u64;
-
-    fn get(&self, doc: DocId) -> u64 {
-        self.min_value + self.bit_unpacker.get(doc as usize)
-    }
-
-    fn is_enabled(field_type: &FieldType) -> bool {
-        match *field_type {
-            FieldType::U64(ref integer_options) => integer_options.is_fast(),
-            _ => false,
-        }
-    }
-
-    fn get_range(&self, start: u32, output: &mut [Self::ValueType]) {
-        self.bit_unpacker.get_range(start, output);
-        for out in output.iter_mut() {
-            *out += self.min_value;
-        }
-    }
-
-    /// Opens a new fast field reader given a read only source.
-    ///
-    /// # Panics
-    /// Panics if the data is corrupted.
-    fn open(data: ReadOnlySource) -> U64FastFieldReader {
+impl<Item: FastValue> FastFieldReader<Item> {
+    /// Opens a fast field given a source.
+    pub fn open(data: ReadOnlySource) -> Self {
        let min_value: u64;
        let amplitude: u64;
        {
@@ -114,17 +42,64 @@ impl FastFieldReader for U64FastFieldReader {
        let max_value = min_value + amplitude;
        let num_bits = compute_num_bits(amplitude);
        let owning_ref = OwningRef::new(data).map(|data| &data[16..]);
-        let bit_unpacker = BitUnpacker::new(owning_ref, num_bits as usize);
-        U64FastFieldReader {
-            min_value: min_value,
-            max_value: max_value,
-            bit_unpacker: bit_unpacker,
+        let bit_unpacker = BitUnpacker::new(owning_ref, num_bits);
+        FastFieldReader {
+            min_value_u64: min_value,
+            max_value_u64: max_value,
+            bit_unpacker,
+            _phantom: PhantomData,
        }
    }
+
+    /// Return the value associated to the given document.
+    ///
+    /// This accessor should return as fast as possible.
+    ///
+    /// # Panics
+    ///
+    /// May panic if `doc` is greater than the segment
+    // `maxdoc`.
+    pub fn get(&self, doc: DocId) -> Item {
+        Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc as usize))
+    }
+
+    /// Fills an output buffer with the fast field values
+    /// associated with the `DocId` going from
+    /// `start` to `start + output.len()`.
+    ///
+    /// # Panics
+    ///
+    /// May panic if `start + output.len()` is greater than
+    /// the segment's `maxdoc`.
+    pub fn get_range(&self, start: u32, output: &mut [Item]) {
+        let output_u64: &mut [u64] = unsafe { mem::transmute(output) };
+        self.bit_unpacker.get_range(start, output_u64);
+        for out in output_u64.iter_mut() {
+            *out = Item::from_u64(*out + self.min_value_u64).as_u64();
+        }
+    }
+
+    /// Returns the minimum value for this fast field.
+    ///
+    /// The max value does not take in account of possible
+    /// deleted document, and should be considered as an upper bound
+    /// of the actual maximum value.
+    pub fn min_value(&self) -> Item {
+        Item::from_u64(self.min_value_u64)
+    }
+
+    /// Returns the maximum value for this fast field.
+    ///
+    /// The max value does not take in account of possible
+    /// deleted document, and should be considered as an upper bound
+    /// of the actual maximum value.
+    pub fn max_value(&self) -> Item {
+        Item::from_u64(self.max_value_u64)
+    }
 }

-impl From<Vec<u64>> for U64FastFieldReader {
-    fn from(vals: Vec<u64>) -> U64FastFieldReader {
+impl<Item: FastValue> From<Vec<Item>> for FastFieldReader<Item> {
+    fn from(vals: Vec<Item>) -> FastFieldReader<Item> {
        let mut schema_builder = SchemaBuilder::default();
        let field = schema_builder.add_u64_field("field", FAST);
        let schema = schema_builder.build();
@@ -142,89 +117,21 @@ impl From<Vec<u64>> for U64FastFieldReader {
                    .get_field_writer(field)
                    .expect("With a RAMDirectory, this should never fail.");
                for val in vals {
-                    fast_field_writer.add_val(val);
+                    fast_field_writer.add_val(val.to_u64());
                }
            }
-            fast_field_writers.serialize(&mut serializer).unwrap();
+            fast_field_writers
+                .serialize(&mut serializer, &HashMap::new())
+                .unwrap();
            serializer.close().unwrap();
        }

        let source = directory.open_read(path).expect("Failed to open the file");
        let composite_file =
            CompositeFile::open(&source).expect("Failed to read the composite file");
-
        let field_source = composite_file
            .open_read(field)
            .expect("File component not found");
-        U64FastFieldReader::open(field_source)
-    }
-}
-
-/// `FastFieldReader` for signed 64-bits integers.
-pub struct I64FastFieldReader {
-    underlying: U64FastFieldReader,
-}
-
-impl I64FastFieldReader {
-    /// Returns the minimum value for this fast field.
-    ///
-    /// The min value does not take in account of possible
-    /// deleted document, and should be considered as a lower bound
-    /// of the actual minimum value.
-    pub fn min_value(&self) -> i64 {
-        common::u64_to_i64(self.underlying.min_value())
-    }
-
-    /// Returns the maximum value for this fast field.
-    ///
-    /// The max value does not take in account of possible
-    /// deleted document, and should be considered as an upper bound
-    /// of the actual maximum value.
-    pub fn max_value(&self) -> i64 {
-        common::u64_to_i64(self.underlying.max_value())
-    }
-}
-
-impl FastFieldReader for I64FastFieldReader {
-    type ValueType = i64;
-
-    ///
-    ///
-    /// # Panics
-    ///
-    /// May panic or return wrong random result if `doc`
-    /// is greater or equal to the segment's `maxdoc`.
-    fn get(&self, doc: DocId) -> i64 {
-        common::u64_to_i64(self.underlying.get(doc))
-    }
-
-    ///
-    /// # Panics
-    ///
-    /// May panic or return wrong random result if `doc`
-    /// is greater or equal to the segment's `maxdoc`.
-    fn get_range(&self, start: u32, output: &mut [Self::ValueType]) {
-        let output_u64: &mut [u64] = unsafe { mem::transmute(output) };
-        self.underlying.get_range(start, output_u64);
-        for mut_val in output_u64.iter_mut() {
-            *mut_val = common::u64_to_i64(*mut_val as u64) as u64;
-        }
-    }
-
-    /// Opens a new fast field reader given a read only source.
-    ///
-    /// # Panics
-    /// Panics if the data is corrupted.
-    fn open(data: ReadOnlySource) -> I64FastFieldReader {
-        I64FastFieldReader {
-            underlying: U64FastFieldReader::open(data),
-        }
-    }
-
-    fn is_enabled(field_type: &FieldType) -> bool {
-        match *field_type {
-            FieldType::I64(ref integer_options) => integer_options.is_fast(),
-            _ => false,
-        }
+        FastFieldReader::open(field_source)
    }
 }
--- a/src/fastfield/serializer.rs
+++ b/src/fastfield/serializer.rs
@@ -1,7 +1,8 @@
 use common::BinarySerializable;
 use directory::WritePtr;
 use schema::Field;
-use common::bitpacker::{compute_num_bits, BitPacker};
+use common::bitpacker::BitPacker;
+use common::compute_num_bits;
 use common::CountingWriter;
 use common::CompositeWrite;
 use std::io::{self, Write};
@@ -45,7 +46,18 @@ impl FastFieldSerializer {
        min_value: u64,
        max_value: u64,
    ) -> io::Result<FastSingleFieldSerializer<CountingWriter<WritePtr>>> {
-        let field_write = self.composite_write.for_field(field);
+        self.new_u64_fast_field_with_idx(field, min_value, max_value, 0)
+    }
+
+    /// Start serializing a new u64 fast field
+    pub fn new_u64_fast_field_with_idx(
+        &mut self,
+        field: Field,
+        min_value: u64,
+        max_value: u64,
+        idx: usize,
+    ) -> io::Result<FastSingleFieldSerializer<CountingWriter<WritePtr>>> {
+        let field_write = self.composite_write.for_field_with_idx(field, idx);
        FastSingleFieldSerializer::open(field_write, min_value, max_value)
    }

@@ -61,6 +73,7 @@ pub struct FastSingleFieldSerializer<'a, W: Write + 'a> {
    bit_packer: BitPacker,
    write: &'a mut W,
    min_value: u64,
+    num_bits: u8,
 }

 impl<'a, W: Write> FastSingleFieldSerializer<'a, W> {
@@ -73,18 +86,20 @@ impl<'a, W: Write> FastSingleFieldSerializer<'a, W> {
        let amplitude = max_value - min_value;
        amplitude.serialize(write)?;
        let num_bits = compute_num_bits(amplitude);
-        let bit_packer = BitPacker::new(num_bits as usize);
+        let bit_packer = BitPacker::new();
        Ok(FastSingleFieldSerializer {
            write,
            bit_packer,
            min_value,
+            num_bits,
        })
    }

    /// Pushes a new value to the currently open u64 fast field.
    pub fn add_val(&mut self, val: u64) -> io::Result<()> {
        let val_to_write: u64 = val - self.min_value;
-        self.bit_packer.write(val_to_write, &mut self.write)?;
+        self.bit_packer
+            .write(val_to_write, self.num_bits, &mut self.write)?;
        Ok(())
    }

--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -1,92 +1,110 @@
-use schema::{Document, Field, Schema};
+use schema::{Cardinality, Document, Field, Schema};
 use fastfield::FastFieldSerializer;
 use std::io;
-use schema::Value;
-use DocId;
 use schema::FieldType;
 use common;
 use common::VInt;
+use std::collections::HashMap;
+use postings::UnorderedTermId;
+use super::multivalued::MultiValueIntFastFieldWriter;
 use common::BinarySerializable;

 /// The fastfieldswriter regroup all of the fast field writers.
 pub struct FastFieldsWriter {
-    field_writers: Vec<IntFastFieldWriter>,
+    single_value_writers: Vec<IntFastFieldWriter>,
+    multi_values_writers: Vec<MultiValueIntFastFieldWriter>,
 }

 impl FastFieldsWriter {
    /// Create all `FastFieldWriter` required by the schema.
    pub fn from_schema(schema: &Schema) -> FastFieldsWriter {
-        let field_writers: Vec<IntFastFieldWriter> = schema
-            .fields()
-            .iter()
-            .enumerate()
-            .flat_map(|(field_id, field_entry)| {
-                let field = Field(field_id as u32);
-                match *field_entry.field_type() {
-                    FieldType::I64(ref int_options) => {
-                        if int_options.is_fast() {
-                            let mut fast_field_writer = IntFastFieldWriter::new(field);
-                            fast_field_writer.set_val_if_missing(common::i64_to_u64(0i64));
-                            Some(fast_field_writer)
-                        } else {
-                            None
-                        }
-                    }
-                    FieldType::U64(ref int_options) => {
-                        if int_options.is_fast() {
-                            Some(IntFastFieldWriter::new(field))
-                        } else {
-                            None
-                        }
-                    }
-                    _ => None,
-                }
-            })
-            .collect();
-        FastFieldsWriter { field_writers }
-    }
+        let mut single_value_writers = Vec::new();
+        let mut multi_values_writers = Vec::new();

-    /// Returns a `FastFieldsWriter`
-    /// with a `IntFastFieldWriter` for each
-    /// of the field given in argument.
-    pub fn new(fields: Vec<Field>) -> FastFieldsWriter {
+        for (field_id, field_entry) in schema.fields().iter().enumerate() {
+            let field = Field(field_id as u32);
+            let default_value = if let FieldType::I64(_) = *field_entry.field_type() {
+                common::i64_to_u64(0i64)
+            } else {
+                0u64
+            };
+            match *field_entry.field_type() {
+                FieldType::I64(ref int_options) | FieldType::U64(ref int_options) => {
+                    match int_options.get_fastfield_cardinality() {
+                        Some(Cardinality::SingleValue) => {
+                            let mut fast_field_writer = IntFastFieldWriter::new(field);
+                            fast_field_writer.set_val_if_missing(default_value);
+                            single_value_writers.push(fast_field_writer);
+                        }
+                        Some(Cardinality::MultiValues) => {
+                            let fast_field_writer = MultiValueIntFastFieldWriter::new(field, false);
+                            multi_values_writers.push(fast_field_writer);
+                        }
+                        None => {}
+                    }
+                }
+                FieldType::HierarchicalFacet => {
+                    let fast_field_writer = MultiValueIntFastFieldWriter::new(field, true);
+                    multi_values_writers.push(fast_field_writer);
+                }
+                _ => {}
+            }
+        }
        FastFieldsWriter {
-            field_writers: fields.into_iter().map(IntFastFieldWriter::new).collect(),
+            single_value_writers,
+            multi_values_writers,
        }
    }

    /// Get the `FastFieldWriter` associated to a field.
    pub fn get_field_writer(&mut self, field: Field) -> Option<&mut IntFastFieldWriter> {
        // TODO optimize
-        self.field_writers
+        self.single_value_writers
            .iter_mut()
-            .find(|field_writer| field_writer.field == field)
+            .find(|field_writer| field_writer.field() == field)
+    }
+
+    /// Returns the fast field multi-value writer for the given field.
+    ///
+    /// Returns None if the field does not exist, or is not
+    /// configured as a multivalued fastfield in the schema.
+    pub(crate) fn get_multivalue_writer(
+        &mut self,
+        field: Field,
+    ) -> Option<&mut MultiValueIntFastFieldWriter> {
+        // TODO optimize
+        // TODO expose for users
+        self.multi_values_writers
+            .iter_mut()
+            .find(|multivalue_writer| multivalue_writer.field() == field)
    }

    /// Indexes all of the fastfields of a new document.
    pub fn add_document(&mut self, doc: &Document) {
-        for field_writer in &mut self.field_writers {
+        for field_writer in &mut self.single_value_writers {
+            field_writer.add_document(doc);
+        }
+        for field_writer in &mut self.multi_values_writers {
+            field_writer.next_doc();
            field_writer.add_document(doc);
        }
    }

    /// Serializes all of the `FastFieldWriter`s by pushing them in
    /// order to the fast field serializer.
-    pub fn serialize(&self, serializer: &mut FastFieldSerializer) -> io::Result<()> {
-        for field_writer in &self.field_writers {
+    pub fn serialize(
+        &self,
+        serializer: &mut FastFieldSerializer,
+        mapping: &HashMap<Field, HashMap<UnorderedTermId, usize>>,
+    ) -> io::Result<()> {
+        for field_writer in &self.single_value_writers {
            field_writer.serialize(serializer)?;
        }
-        Ok(())
-    }
-
-    /// Ensures all of the fast field writers have
-    /// reached `doc`. (included)
-    ///
-    /// The missing values will be filled with 0.
-    pub fn fill_val_up_to(&mut self, doc: DocId) {
-        for field_writer in &mut self.field_writers {
-            field_writer.fill_val_up_to(doc);
+        for field_writer in &self.multi_values_writers {
+            let field = field_writer.field();
+            field_writer.serialize(serializer, mapping.get(&field))?;
        }
+        Ok(())
    }
 }

@@ -127,6 +145,11 @@ impl IntFastFieldWriter {
        }
    }

+    /// Returns the field that this writer is targetting.
+    pub fn field(&self) -> Field {
+        self.field
+    }
+
    /// Sets the default value.
    ///
    /// This default value is recorded for documents if
@@ -135,19 +158,6 @@ impl IntFastFieldWriter {
        self.val_if_missing = val_if_missing;
    }

-    /// Ensures all of the fast field writer have
-    /// reached `doc`. (included)
-    ///
-    /// The missing values will be filled with 0.
-    fn fill_val_up_to(&mut self, doc: DocId) {
-        let target = doc as usize + 1;
-        debug_assert!(self.val_count <= target);
-        let val_if_missing = self.val_if_missing;
-        while self.val_count < target {
-            self.add_val(val_if_missing);
-        }
-    }
-
    /// Records a new value.
    ///
    /// The n-th value being recorded is implicitely
@@ -180,11 +190,7 @@ impl IntFastFieldWriter {
    /// only the first one is taken in account.
    fn extract_val(&self, doc: &Document) -> u64 {
        match doc.get_first(self.field) {
-            Some(v) => match *v {
-                Value::U64(ref val) => *val,
-                Value::I64(ref val) => common::i64_to_u64(*val),
-                _ => panic!("Expected a u64field, got {:?} ", v),
-            },
+            Some(v) => super::value_to_u64(v),
            None => self.val_if_missing,
        }
    }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Paul Masurel	232ca5c06c	Added convert to static [u8]	2018-04-10 21:18:32 +09:00
Paul Masurel	743ae102f1	Using bitpacker@3	2018-04-10 10:05:42 +09:00
Paul Masurel	e78af20375	remove comment	2018-04-09 21:51:17 +09:00
Paul Masurel	30637f7a7f	Ok on wasm	2018-03-31 17:42:26 +09:00
Paul Masurel	0107fe886b	Removed timer	2018-03-31 15:40:16 +09:00
Paul Masurel	1d9566e73c	Making mmap a feature	2018-03-31 13:23:43 +09:00
Paul Masurel	8006f1df11	Added comments	2018-03-28 08:28:49 +09:00
Paul Masurel	ffa03bad71	TermScorer does not handle deletes	2018-03-27 17:35:20 +09:00
Paul Masurel	98cf4ba63a	Small refactor of postings's skip method	2018-03-27 16:14:28 +09:00
Paul Masurel	4d65771e04	field norm reader is not an option anymore.	2018-03-26 13:25:29 +09:00
Paul Masurel	9712a75399	Added unit test for intersection score	2018-03-25 12:58:24 +09:00
Paul Masurel	3ae03b91ae	PhraseScorer's score aligned with that of Lucene.)	2018-03-25 12:44:16 +09:00
Paul Masurel	238b02ce7d	Bugfixed	2018-03-23 18:50:57 +09:00
Paul Masurel	3091459777	Fixed main bug. Unit test still not passing because of altered scoring	2018-03-23 13:52:10 +09:00
Paul Masurel	b7f8884246	Closes #245 = BM25. (#260 ) * Closes #245 = BM25. Scores are the same as Lucene. * Fixing travis conf	2018-03-22 15:06:56 +09:00
Paul Masurel	e22f767fda	Backmerge	2018-03-21 21:18:46 +09:00
Paul Masurel	3ecfc36e53	Total field norm fixed.	2018-03-21 20:43:02 +09:00
Paul Masurel	1c9450174e	Fieldnorm reader working except merge	2018-03-21 17:36:16 +09:00
Paul Masurel	cde4c391cd	Added fieldnorm module	2018-03-21 15:41:46 +09:00
Paul Masurel	6d47634616	Added unit tests	2018-03-20 12:11:28 +09:00
Paul Masurel	39b182c24b	Simplified phrase queries. Reading several time is ok.	2018-03-20 11:47:48 +09:00
Paul Masurel	baaae3f4ec	Making it possible to read positions twice	2018-03-20 11:36:22 +09:00
Paul Masurel	63064601a7	Readded test for reading positions twice	2018-03-20 10:04:36 +09:00
Paul Masurel	07a8023a3a	Added	2018-03-19 14:36:43 +09:00
Paul Masurel	59639cd311	In sync with master. Fixed merging	2018-03-19 12:58:42 +09:00
Paul Masurel	b0e5e1f61d	Back merged master	2018-03-19 12:19:08 +09:00
Paul Masurel	234a902470	Removed cc from Cargo.toml	2018-03-19 12:09:25 +09:00
Paul Masurel	75d130f1ce	Edited CHANGELOG	2018-03-19 12:01:48 +09:00
Paul Masurel	410187dd24	Removed .vimrc	2018-03-19 11:54:10 +09:00
Paul Masurel	88303d4833	Removed script directory	2018-03-19 11:53:15 +09:00
Paul Masurel	a26b0ff4a2	Removed exclude cpp from travis configuration	2018-03-19 11:51:41 +09:00
Paul Masurel	d4ed86f13a	Issue/255 (#256 ) * Remove cpp compression. * Pointing to publish bitpacking * Edited README	2018-03-19 11:48:40 +09:00
Paul Masurel	fc8902353c	fieldnrom encoding. test broken	2018-03-10 18:35:16 +09:00
Paul Masurel	a2ee988304	Small change in pop_lowest.	2018-03-10 15:32:30 +09:00
Paul Masurel	97b7984200	Updated CHANGELOG	2018-03-10 14:08:11 +09:00
Paul Masurel	8683718159	Version bump	2018-03-10 14:01:30 +09:00
Paul Masurel	0cf274135b	Clippy	2018-03-10 13:07:18 +09:00
Paul Masurel	a3b44773bb	Bugfix and rustfmt	2018-03-10 12:21:50 +09:00
Paul Masurel	ec7c582109	NOBUG no-simd compression fix	2018-03-09 14:19:58 +09:00
Ewan Higgs	ee7ab72fb1	Support trailing commas using ',+ ,' trick from Blandy 2017. (#250 )	2018-02-27 10:33:39 +09:00
Paul Masurel	2c20759829	removed unsafecell for position computer	2018-02-24 12:07:55 +09:00
Paul Masurel	23387b0ed0	Positions writes to an external Vec	2018-02-24 11:14:45 +09:00
Dylan DPC	e82859f2e6	Update Cargo.toml (#249 )	2018-02-24 09:17:33 +09:00
Paul Masurel	be830b03c5	Bugfix in intersection.advance and impl skip_next	2018-02-23 11:55:23 +09:00
Paul Masurel	1b94a3e382	Phrase query optimisation	2018-02-23 00:00:22 +09:00
Paul Masurel	c3fbc4c8fa	Simplified a notch TinySet::pop_lowest()	2018-02-22 10:43:06 +09:00
Paul Masurel	4ee2db25a0	Generic on Postings rather than deletes in TermScorer	2018-02-22 08:26:45 +09:00
Paul Masurel	e423784fd0	Added specialized SegmentPostings when there are no DeleteSet	2018-02-21 23:49:20 +09:00
Paul Masurel	fdb9c3c516	Tantivy version 0.5.0	2018-02-21 11:38:26 +09:00
Paul Masurel	6fb114224a	Added unit test	2018-02-21 00:13:04 +09:00
Paul Masurel	2c3e33895a	Added unit tests	2018-02-21 00:03:41 +09:00
Paul Masurel	d512b53688	Added handling of parenthesis in query parser	2018-02-20 23:18:02 +09:00
Paul Masurel	c8afd2b55d	Added unit tests	2018-02-20 17:05:33 +09:00
Paul Masurel	3fd6d7125b	Added unit test	2018-02-20 13:12:05 +09:00
Paul Masurel	de6a3987a9	Ignoring functional test	2018-02-20 12:58:06 +09:00
Paul Masurel	3dedc465fa	Merge branch 'feature/multivalued-i64-u64'	2018-02-20 12:54:18 +09:00
Paul Masurel	f16cc6367e	Refactoring of fastfields	2018-02-20 12:52:30 +09:00
Paul Masurel	4026fc5fb1	Removed redundant compressed_block_size function	2018-02-20 08:28:28 +09:00
Paul Masurel	43742a93ef	Multivalue u64 field / i64 field.	2018-02-20 00:16:20 +09:00
Paul Masurel	2a843d86cb	Code cleaning	2018-02-19 21:51:39 +09:00
Paul Masurel	9a706c296a	Larger union horizon	2018-02-19 21:50:33 +09:00
Paul Masurel	5ff8123b7a	Code cleaning	2018-02-19 15:41:19 +09:00
Paul Masurel	6061158506	Added long running test to travis conf	2018-02-19 13:23:04 +09:00
Paul Masurel	4e8b0e89d9	Added unit test	2018-02-19 13:19:18 +09:00
Paul Masurel	0540ebb49e	Cargo clippy	2018-02-19 12:36:24 +09:00
Paul Masurel	ef94582203	Rustfmt	2018-02-19 12:12:10 +09:00
Paul Masurel	2f242d5f52	Moving docset around	2018-02-19 12:07:05 +09:00
Paul Masurel	da3d372e6e	Faster union counts	2018-02-19 10:17:16 +09:00
Paul Masurel	42fd3fe5c7	Bugfix on TermWeight::count()	2018-02-18 10:59:18 +09:00
Paul Masurel	5dae6e6bbc	Downcast `TermScorer` for intersection when all legs are TermScorers	2018-02-18 10:28:43 +09:00
Paul Masurel	e608e0a1df	Removed half baked usage of Any	2018-02-18 10:01:14 +09:00
Paul Masurel	6c8c90d348	Removed lifetime from scorer	2018-02-18 09:12:40 +09:00
Paul Masurel	eb50e92ec4	Removed specialized postings on SegmentPostings	2018-02-18 00:09:15 +09:00
Paul Masurel	20bede9462	Bugfix when requesting no termfreq.	2018-02-17 22:41:12 +09:00
Paul Masurel	4640ab4e65	Merge branch 'master' into issue/query-perf	2018-02-17 17:31:51 +09:00
Paul Masurel	cd51ed0f9f	Added comments	2018-02-17 16:59:28 +09:00
Paul Masurel	6676fe5717	Added a count method	2018-02-17 15:02:51 +09:00
Paul Masurel	292bb17346	Disable scoring - Disabling scoring is an argument of the `.weight()` method - Collectors declare whether they need scoring	2018-02-17 12:43:16 +09:00
Paul Masurel	0300e7272b	Scoring for union.	2018-02-17 11:56:21 +09:00
Paul Masurel	8760899fa2	Stupid implementaiton of Box<Scorer>::collect	2018-02-16 19:30:50 +09:00
Paul Masurel	c89d570a79	rustfmt	2018-02-16 17:50:05 +09:00
Paul Masurel	1da06d867b	Using the same logic when score is enabled.	2018-02-16 17:36:33 +09:00
Paul Masurel	76e8db6ed3	blop	2018-02-16 14:57:08 +09:00
Paul Masurel	31e5580bfa	Renaming intersection / exclude	2018-02-16 11:55:56 +09:00
Paul Masurel	930d3db2f7	Integrated reqopt_scorer	2018-02-16 11:43:27 +09:00
Paul Masurel	1593e1dc6f	Added reqopt	2018-02-16 11:22:39 +09:00
Paul Masurel	e0189fc9e6	Added exclude query	2018-02-14 18:06:51 +09:00
Paul Masurel	ffdb4ef0a7	Added unit test	2018-02-14 11:58:40 +09:00
Paul Masurel	58845344c2	Unit test + bugfix in union	2018-02-13 14:54:20 +09:00
Paul Masurel	548ec9ecca	Added ok unit test	2018-02-12 17:48:41 +09:00
Paul Masurel	86b700fa93	Updated travis.yml	2018-02-12 12:13:36 +09:00
Paul Masurel	e95c49e749	Added unit test to show bug in intersection	2018-02-12 12:06:19 +09:00
Paul Masurel	f3033a8469	Added sudo required to travis conf because of https://github.com/travis-ci/travis-ci/issues/9061	2018-02-12 11:19:12 +09:00
Paul Masurel	c4125bda59	Backmerging master	2018-02-12 11:08:57 +09:00
Paul Masurel	a7ffc0e610	Rustfmt	2018-02-12 10:31:29 +09:00
Paul Masurel	9370427ae2	Terminfo blocks (#244 ) * Using u64 key in the store * Using Option<> for the next element, as opposed to u64 * Code simplification. * Added TermInfoStoreWriter. * Added a TermInfoStore * Added FixedSized for BinarySerialized.	2018-02-12 10:24:58 +09:00
Paul Masurel	1fc7afa90a	Issue/range query (#242 ) BitSet and RangeQuery	2018-02-05 09:33:25 +09:00
Paul Masurel	6a104e4f69	Cargo fmt	2018-02-03 11:59:34 +09:00
Paul Masurel	920f086e1d	Clippy	2018-02-03 11:46:01 +09:00
Paul Masurel	13aaca7e11	Merge branch 'master' into merge-facets	2018-02-03 11:13:02 +09:00
Paul Masurel	df53dc4ceb	Format	2018-02-03 00:21:05 +09:00
Paul Masurel	dd028841e8	Added documentation / test and change the contract of .add_facet()	2018-02-03 00:17:51 +09:00
Paul Masurel	eb84b8a60d	bugfix	2018-02-02 18:52:07 +09:00
Paul Masurel	c05f46ad0e	skip for intersection	2018-02-02 17:22:58 +09:00
Paul Masurel	435ff9d524	Make constructor of RangeQuery public	2018-02-02 16:50:22 +09:00
Paul Masurel	fdd5dd8496	Merge branch 'master' into issue/query-perf	2018-02-02 16:39:28 +09:00
Paul Masurel	fb5476d5de	Query optimization: phrase query + union	2018-02-02 16:39:17 +09:00
Paul Masurel	dd8332c327	Added disabling scoring	2018-02-02 12:11:56 +09:00
Paul Masurel	63d201150b	issue/range-query Added range query	2018-02-02 00:41:12 +09:00
Paul Masurel	b78efdc59f	NOBUG Use the skipping logic of segment postings in	2018-02-01 18:36:55 +09:00
Paul Masurel	5cb08f7996	Method to create bitset from DocSet directly.	2018-02-01 18:25:43 +09:00
Paul Masurel	1947a19700	Added bitse	2018-01-31 23:56:54 +09:00
Paul Masurel	271b019420	added cargo doc	2018-01-30 15:18:19 +09:00
Paul Masurel	340693184f	Added comment	2018-01-30 15:15:55 +09:00
Paul Masurel	97782a9511	updated travis-cargo	2018-01-30 13:18:51 +09:00
Paul Masurel	930010aa88	Unit test passing	2018-01-28 00:03:51 +09:00
Paul Masurel	7f5b07d4e7	Fixing unit tests	2018-01-25 14:55:29 +09:00
Paul Masurel	3edb3dce6a	Test not passing	2018-01-25 12:46:32 +09:00
Paul Masurel	1edaf7a312	Closes #236 . Removes dependency to version.	2018-01-20 12:12:43 +09:00
Paul Masurel	137906ff29	Fixing PhraseQuery, broken due to the reordering of the intersection clauses. Closes #234	2018-01-12 21:01:28 +09:00
Paul Masurel	143a143cde	issue/232 added unit test. (#233 )	2018-01-11 23:37:45 +09:00