Exposed API to create a new Segment.

Merge branch 'issue/136' into tantivy-imhotep
Removed the clunky linked list logic of the heap.
2026-01-04 16:22:55 +00:00 · 2017-05-13 15:15:35 +09:00 · 2017-05-12 17:04:20 +09:00 · 2017-05-12 14:01:52 +09:00 · 2017-05-12 13:51:09 +09:00 · 2017-05-12 13:39:04 +09:00
156 changed files with 105164 additions and 3230 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1 @@
+cpp/* linguist-vendored
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@ target/release
 Cargo.lock
 benchmark
 .DS_Store
+cpp/simdcomp/bitpackingbenchmark
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "cpp/simdcomp"]
-	path = cpp/simdcomp
-	url = git@github.com:lemire/simdcomp.git
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,11 +1,6 @@
 language: rust
 rust:
  - nightly
-git:
-  submodules: false
-before_install:
-  - sed -i 's/git@github.com:/https:\/\/github.com\//' .gitmodules
-  - git submodule update --init --recursive
 env:
  global:
    - CC=gcc-4.8
@@ -33,8 +28,9 @@ script:
    travis-cargo test &&
    travis-cargo bench &&
    travis-cargo doc
+  - cargo run --example simple_search
 after_success:
  - bash ./script/build-doc.sh
  - travis-cargo doc-upload
  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then travis-cargo coveralls --no-sudo --verify; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ./kcov/build/src/kcov --verify --coveralls-id=$TRAVIS_JOB_ID --exclude-pattern=/.cargo target/kcov target/debug/tantivy-*; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ./kcov/build/src/kcov --verify --coveralls-id=$TRAVIS_JOB_ID --include-path=`pwd`/src --exclude-path=`pwd`/cpp --exclude-pattern=/.cargo target/kcov target/debug/tantivy-*; fi
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,64 @@
+Tantivy 0.4.0
+==========================
+- Raise the limit of number of fields (previously 256 fields)
+- Removed u32 fields. They are replaced by u64 and i64 fields (#65)
+- Replacing rustc_serialize by serde. Kudos to @KodrAus and @lnicola
+- QueryParser:
+  - Explicit error returned when searched for a term that is not indexed
+  - Searching for a int term via the query parser was broken `(age:1)`
+  - Searching for a non-indexed field returns an explicit Error
+  - Phrase query for non-tokenized field are not tokenized by the query parser.
+
+Tantivy 0.3.1
+==========================
+
+- Expose a method to trigger files garbage collection
+
+
+Tantivy 0.3
+==========================
+
+
+Special thanks to @Kodraus @lnicola @Ameobea @manuel-woelker @celaus
+for their contribution to this release.
+
+Thanks also to everyone in tantivy gitter chat 
+for their advise and company :)
+
+https://gitter.im/tantivy-search/tantivy
+
+
+Warning:
+
+Tantivy 0.3 is NOT backward compatible with tantivy 0.2 
+code and index format.
+You should not expect backward compatibility before 
+tantivy 1.0.
+
+
+New Features
+------------
+
+- Delete. You can now delete documents from an index.
+- Support for windows (Thanks to @lnicola)
+
+
+Various Bugfixes & small improvements
+----------------------------------------
+
+- Added CI for Windows (https://ci.appveyor.com/project/fulmicoton/tantivy)
+Thanks to @KodrAus ! (#108)
+- Various dependy version update (Thanks to @Ameobea) #76
+- Fixed several race conditions in `Index.wait_merge_threads`
+- Fixed #72. Mmap were never released.
+- Fixed #80. Fast field used to take an amplitude of 32 bits after a merge. (Ouch!)
+- Fixed #92. u32 are now encoded using big endian in the fst
+  in order to make there enumeration consistent with
+  the natural ordering.
+- Building binary targets for tantivy-cli (Thanks to @KodrAus)
+- Misc invisible bug fixes, and code cleanup.
+- Use 
+
+
+
+
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,43 +1,51 @@
 [package]
 name = "tantivy"
-version = "0.1.1"
+version = "0.4.0-alpha"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 build = "build.rs"
 license = "MIT"
-
+categories = ["database-implementations", "data-structures"]
 description = """Tantivy is a search engine library."""
-
-documentation = "http://fulmicoton.com/tantivy/tantivy/index.html"
+documentation = "https://tantivy-search.github.io/tantivy/tantivy/index.html"
 homepage = "https://github.com/tantivy-search/tantivy"
 repository = "https://github.com/tantivy-search/tantivy"
-
 readme = "README.md"
 keywords = ["search", "information", "retrieval"]

 [dependencies]
-byteorder = "0.4"
-memmap = "0.2"
-lazy_static = "0.1"
-regex = "0.1"
-fst = "0.1"
-atomicwrites = "0.0.14"
-tempfile = "2.0"
-rustc-serialize = "0.3"
-log = "0.3"
-combine = "2.0.*"
+byteorder = "1.0"
+memmap = "0.4"
+lazy_static = "0.2.1"
+regex = "0.2"
+fst = "0.1.37"
+atomicwrites = "0.1.3"
+tempfile = "2.1"
+log = "0.3.6"
+combine = "2.2"
 tempdir = "0.3"
-bincode = "0.4"
-libc = {version = "0.2.6", optional=true}
-num_cpus = "0.2"
-itertools = "0.4"
-lz4 = "1.13"
+serde = "1.0"
+serde_derive = "1.0"
+serde_json = "1.0"
+bincode = "0.7.0-alpha7"
+libc = {version = "0.2.20", optional=true}
+num_cpus = "1.2"
+itertools = "0.5.9"
+lz4 = "1.20"
+bit-set = "0.4.0"
 time = "0.1"
-uuid = "0.1"
+uuid = { version = "0.5", features = ["v4", "serde"] }
 chan = "0.1"
+version = "2"
 crossbeam = "0.2"
+futures = "0.1.9"
+futures-cpupool = "0.1.2"
+
+[target.'cfg(windows)'.dependencies]
+winapi = "0.2"

 [dev-dependencies]
 rand = "0.3"
+env_logger = "0.4"

 [build-dependencies]
 gcc = {version = "0.3", optional=true}
@@ -52,3 +60,7 @@ debug-assertions = false
 [features]
 default = ["simdcompression"]
 simdcompression = ["libc", "gcc"]
+
+
+[badges]
+travis-ci = { repository = "tantivy-search/tantivy" }
--- a/README.md
+++ b/README.md
@@ -1,45 +1,52 @@
 ![Tantivy](https://tantivy-search.github.io/logo/tantivy-logo.png)

 [![Build Status](https://travis-ci.org/tantivy-search/tantivy.svg?branch=master)](https://travis-ci.org/tantivy-search/tantivy)
-[![Coverage Status](https://coveralls.io/repos/github/tantivy-search/tantivy/badge.svg?branch=master)](https://coveralls.io/github/tantivy-search/tantivy?branch=master)
-[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![Coverage Status](https://coveralls.io/repos/github/tantivy-search/tantivy/badge.svg?branch=master&refresh1)](https://coveralls.io/github/tantivy-search/tantivy?branch=master)
 [![Join the chat at https://gitter.im/tantivy-search/tantivy](https://badges.gitter.im/tantivy-search/tantivy.svg)](https://gitter.im/tantivy-search/tantivy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
-
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![Build status](https://ci.appveyor.com/api/projects/status/r7nb13kj23u8m9pj?svg=true)](https://ci.appveyor.com/project/fulmicoton/tantivy)
+![beacon for google analytics](https://ga-beacon.appspot.com/UA-88834340-1/tantivy/README)

 **Tantivy** is a **full text search engine library** written in rust.

 It is strongly inspired by Lucene's design.

+
 # Features

 - configurable indexing (optional term frequency and position indexing)
 - tf-idf scoring
 - Basic query language
+- Phrase queries
 - Incremental indexing
 - Multithreaded indexing (indexing English Wikipedia takes 4 minutes on my desktop)
 - mmap based
- SIMD integer compression
+- optional SIMD integer compression
 - u32 fast fields (equivalent of doc values in Lucene)
 - LZ4 compressed document store
 - Cheesy logo with a horse

+Tantivy supports Linux, MacOS and Windows.
+
+
 # Getting started

 - [tantivy's usage example](http://fulmicoton.com/tantivy-examples/simple_search.html)
- [tantivy-cli and its tutorial](https://github.com/fulmicoton/tantivy-cli).
+- [tantivy-cli and its tutorial](https://github.com/tantivy-search/tantivy-cli).
 It will walk you through getting a wikipedia search engine up and running in a few minutes.
- [reference doc](http://fulmicoton.com/tantivy/tantivy/index.html).
-
+- [reference doc]
+    - [For the last released version](https://docs.rs/tantivy/)
+    - [For the last master branch](https://tantivy-search.github.io/tantivy/tantivy/index.html)

 # Compiling 

+Tantivy requires Rust Nightly because it uses requires the features [`box_syntax`](https://doc.rust-lang.org/stable/book/box-syntax-and-patterns.html), [`optin_builtin_traits`](https://github.com/rust-lang/rfcs/blob/master/text/0019-opt-in-builtin-traits.md), and [`conservative_impl_trait`](https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md).
 By default, `tantivy` uses a git submodule called `simdcomp`.
 After cloning the repository, you will need to initialize and update
 the submodules. The project can then be built using `cargo`.

-    git clone git@github.com:fulmicoton/tantivy.git
-    git submodule init
-    git submodule update
+    git clone git@github.com:tantivy-search/tantivy.git
+    cd tantivy
    cargo build


--- a/appveyor.yml
+++ b/appveyor.yml
@@ -0,0 +1,25 @@
+# Appveyor configuration template for Rust using rustup for Rust installation
+# https://github.com/starkat99/appveyor-rust
+
+os: Visual Studio 2015
+environment:
+  matrix:
+    - channel: nightly
+      target: x86_64-pc-windows-msvc
+    - channel: nightly
+      target: x86_64-pc-windows-gnu
+      msys_bits: 64
+
+install:
+  - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
+  - rustup-init -yv --default-toolchain %channel% --default-host %target%
+  - set PATH=%PATH%;%USERPROFILE%\.cargo\bin
+  - if defined msys_bits set PATH=%PATH%;C:\msys64\mingw%msys_bits%\bin
+  - rustc -vV
+  - cargo -vV
+
+build: false
+
+test_script:
+  - REM SET RUST_LOG=tantivy,test & cargo test --verbose
+  - REM SET RUST_LOG=tantivy,test & cargo run --example simple_search
--- a/build.rs
+++ b/build.rs
@@ -1,40 +1,59 @@
-#[cfg(feature= "simdcompression")]
+#[cfg(feature = "simdcompression")]
 mod build {
    extern crate gcc;

-    use std::process::Command;
-
    pub fn build() {
-        Command::new("make")
-            .current_dir("cpp/simdcomp")
-            .output()
-            .unwrap_or_else(|e| { panic!("Failed to make simdcomp: {}", e) });
-        gcc::Config::new()
-                    .cpp(true)
-                    .flag("-std=c++11")
-                    .flag("-O3")
-                    .flag("-mssse3")
-                    .include("./cpp/simdcomp/include")
-                    .object("cpp/simdcomp/avxbitpacking.o")
-                    .object("cpp/simdcomp/simdintegratedbitpacking.o")
-                    .object("cpp/simdcomp/simdbitpacking.o")
-                    .object("cpp/simdcomp/simdpackedsearch.o")
-                    .object("cpp/simdcomp/simdcomputil.o")
-                    .object("cpp/simdcomp/simdpackedselect.o")
-                    .object("cpp/simdcomp/simdfor.o")
-                    .file("cpp/simdcomp_wrapper.cpp")
-                    .compile("libsimdcomp.a");
-        println!("cargo:rustc-flags=-l dylib=stdc++");
+        let mut config = gcc::Config::new();
+        config
+            .include("./cpp/simdcomp/include")
+            .file("cpp/simdcomp/src/avxbitpacking.c")
+            .file("cpp/simdcomp/src/simdintegratedbitpacking.c")
+            .file("cpp/simdcomp/src/simdbitpacking.c")
+            .file("cpp/simdcomp/src/simdpackedsearch.c")
+            .file("cpp/simdcomp/src/simdcomputil.c")
+            .file("cpp/simdcomp/src/simdpackedselect.c")
+            .file("cpp/simdcomp/src/simdfor.c")
+            .file("cpp/simdcomp_wrapper.c");
+
+        if !cfg!(debug_assertions) {
+            config.opt_level(3);
+
+            if cfg!(target_env = "msvc") {
+                config
+                    .define("NDEBUG", None)
+                    .flag("/Gm-")
+                    .flag("/GS-")
+                    .flag("/Gy")
+                    .flag("/Oi")
+                    .flag("/GL");
+            }
+        }
+
+        if !cfg!(target_env = "msvc") {
+            config
+                .include("./cpp/streamvbyte/include")
+                .file("cpp/streamvbyte/src/streamvbyte.c")
+                .file("cpp/streamvbyte/src/streamvbytedelta.c")
+                .flag("-msse4.1")
+                .flag("-march=native")
+                .flag("-std=c99");
+        }
+
+        config.compile("libsimdcomp.a");
+
+        // Workaround for linking static libraries built with /GL
+        // https://github.com/rust-lang/rust/issues/26003
+        if !cfg!(debug_assertions) && cfg!(target_env = "msvc") {
+            println!("cargo:rustc-link-lib=dylib=simdcomp");
+        }
    }
 }

-#[cfg(not(feature= "simdcompression"))]
+#[cfg(not(feature = "simdcompression"))]
 mod build {
-    pub fn build() {
-    }
+    pub fn build() {}
 }

-
 fn main() {
    build::build();
 }
--- a/cpp/simdcomp
+++ b/cpp/simdcomp
--- a/cpp/simdcomp/.gitignore
+++ b/cpp/simdcomp/.gitignore
@@ -0,0 +1,9 @@
+Makefile.in
+lib*
+unit*
+*.o
+src/*.lo
+src/*.o
+src/.deps
+src/.dirstamp
+src/.libs
--- a/cpp/simdcomp/.travis.yml
+++ b/cpp/simdcomp/.travis.yml
@@ -0,0 +1,11 @@
+language: c
+sudo: false
+compiler:
+  - gcc
+  - clang
+
+branches:
+  only:
+    - master
+
+script: make && ./unit
--- a/cpp/simdcomp/CHANGELOG
+++ b/cpp/simdcomp/CHANGELOG
@@ -0,0 +1,9 @@
+Upcoming
+  - added missing include
+  - improved portability (MSVC)
+  - implemented C89 compatibility
+Version 0.0.3 (19 May 2014)
+  - improved documentation
+Version 0.0.2 (6 February 2014)
+  - added go demo
+Version 0.0.1  (5 February 2014)
--- a/cpp/simdcomp/LICENSE
+++ b/cpp/simdcomp/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2014--, The authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+* Neither the name of the {organization} nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/cpp/simdcomp/README.md
+++ b/cpp/simdcomp/README.md
@@ -0,0 +1,137 @@
+The SIMDComp library
+====================
+[![Build Status](https://travis-ci.org/lemire/simdcomp.png)](https://travis-ci.org/lemire/simdcomp)
+
+A simple C library for compressing lists of integers using binary packing and SIMD instructions.
+The assumption is either that you have a list of 32-bit integers where most of them are small, or a list of 32-bit integers where differences between successive integers are small. No software is able to reliably compress an array of 32-bit random numbers.
+
+This library can decode at least 4 billions of compressed integers per second on most
+desktop or laptop processors. That is, it can decompress data at a rate of 15 GB/s.
+This is significantly faster than generic codecs like gzip, LZO, Snappy or LZ4.
+
+On a Skylake Intel processor, it can decode integers at a rate 0.3 cycles per integer,
+which can easily translate into more than 8 decoded billions integers per second.
+
+Contributors: Daniel Lemire, Nathan Kurz, Christoph Rupp, Anatol Belski, Nick White and others
+
+What is it for?
+-------------
+
+This is a low-level library for fast integer compression. By design it does not define a compressed
+format. It is up to the (sophisticated) user to create a compressed format.
+
+Requirements
+-------------
+
+- Your processor should support SSE4.1 (It is supported by most Intel and AMD processors released since 2008.)
+- It is possible to build the core part of the code if your processor support SSE2 (Pentium4 or better)
+- C99 compliant compiler (GCC is assumed)
+- A Linux-like distribution is assumed by the makefile
+
+For a plain C version that does not use SIMD instructions, see https://github.com/lemire/LittleIntPacker
+
+Usage
+-------
+
+Compression works over blocks of 128 integers.
+
+For a complete working example, see example.c (you can build it and
+run it with "make example; ./example").
+
+
+
+1) Lists of integers in random order.
+
+```C            
+const uint32_t b = maxbits(datain);// computes bit width
+simdpackwithoutmask(datain, buffer, b);//compressed to buffer, compressing 128 32-bit integers down to b*32 bytes
+simdunpack(buffer, backbuffer, b);//uncompressed to backbuffer
+```
+
+While 128 32-bit integers are read, only b 128-bit words are written. Thus, the compression ratio is 32/b.
+
+2) Sorted lists of integers.
+
+We used differential coding: we store the difference between successive integers. For this purpose, we need an initial value (called offset).
+
+```C            
+uint32_t offset = 0;
+uint32_t b1 = simdmaxbitsd1(offset,datain); // bit width
+simdpackwithoutmaskd1(offset, datain, buffer, b1);//compressing 128 32-bit integers down to b1*32 bytes
+simdunpackd1(offset, buffer, backbuffer, b1);//uncompressed
+```
+
+General example for arrays of arbitrary length:
+```C
+int compress_decompress_demo() {
+  size_t k, N = 9999;
+  __m128i * endofbuf;
+  uint32_t * datain = malloc(N * sizeof(uint32_t));
+  uint8_t * buffer;
+  uint32_t * backbuffer = malloc(N * sizeof(uint32_t));
+  uint32_t b;
+
+  for (k = 0; k < N; ++k){        /* start with k=0, not k=1! */
+    datain[k] = k;
+  }
+
+  b = maxbits_length(datain, N);
+  buffer = malloc(simdpack_compressedbytes(N,b)); // allocate just enough memory
+  endofbuf = simdpack_length(datain, N, (__m128i *)buffer, b);
+  /* compressed data is stored between buffer and endofbuf using (endofbuf-buffer)*sizeof(__m128i) bytes */
+  /* would be safe to do : buffer = realloc(buffer,(endofbuf-(__m128i *)buffer)*sizeof(__m128i)); */
+  simdunpack_length((const __m128i *)buffer, N, backbuffer, b);
+
+  for (k = 0; k < N; ++k){
+    if(datain[k] != backbuffer[k]) {
+      printf("bug\n");
+      return -1;
+    }
+  }
+  return 0;
+}
+```
+
+
+3) Frame-of-Reference 
+
+We also have frame-of-reference (FOR) functions (see simdfor.h header). They work like the bit packing
+routines, but do not use differential coding so they allow faster search in some cases, at the expense
+of compression.
+
+Setup
+---------
+
+
+make
+make test
+
+and if you are daring:
+
+make install
+
+Go
+--------
+
+If you are a go user, there is a "go" folder where you will find a simple demo.
+
+Other libraries
+----------------
+
+* Fast decoder for VByte-compressed integers https://github.com/lemire/MaskedVByte
+* Fast integer compression in C using StreamVByte https://github.com/lemire/streamvbyte
+* FastPFOR is a C++ research library well suited to compress unsorted arrays: https://github.com/lemire/FastPFor
+* SIMDCompressionAndIntersection is a C++ research library well suited for sorted arrays (differential coding)
+and computing intersections: https://github.com/lemire/SIMDCompressionAndIntersection
+* TurboPFor is a C library that offers lots of interesting optimizations. Well worth checking! (GPL license) https://github.com/powturbo/TurboPFor
+* Oroch is a C++ library that offers a usable API (MIT license) https://github.com/ademakov/Oroch
+
+
+References
+------------
+
+* Daniel Lemire, Leonid Boytsov, Nathan Kurz, SIMD Compression and the Intersection of Sorted Integers, Software Practice & Experience 46 (6) 2016. http://arxiv.org/abs/1401.6399
+* Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second through vectorization, Software Practice & Experience 45 (1), 2015.  http://arxiv.org/abs/1209.2137 http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract
+* Jeff Plaisance, Nathan Kurz, Daniel Lemire, Vectorized VByte Decoding, International Symposium on Web Algorithms 2015, 2015. http://arxiv.org/abs/1503.07387
+* Wayne Xin Zhao, Xudong Zhang, Daniel Lemire, Dongdong Shan, Jian-Yun Nie, Hongfei Yan, Ji-Rong Wen, A General SIMD-based Approach to Accelerating Compression Algorithms, ACM Transactions on Information Systems 33 (3), 2015. http://arxiv.org/abs/1502.01916
+* T. D. Wu, Bitpacking techniques for indexing genomes: I. Hash tables, Algorithms for Molecular Biology 11 (5), 2016. http://almob.biomedcentral.com/articles/10.1186/s13015-016-0069-5
--- a/cpp/simdcomp/benchmarks/benchmark.c
+++ b/cpp/simdcomp/benchmarks/benchmark.c
@@ -0,0 +1,235 @@
+/**
+ * This code is released under a BSD License.
+ */
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "simdcomp.h"
+
+#ifdef _MSC_VER
+# include <windows.h>
+
+__int64 freq;
+
+typedef __int64 time_snap_t;
+
+static time_snap_t time_snap(void)
+{
+	__int64 now;
+
+	QueryPerformanceCounter((LARGE_INTEGER *)&now);
+
+	return (__int64)((now*1000000)/freq);
+}
+# define TIME_SNAP_FMT "%I64d"
+#else
+# define time_snap clock
+# define TIME_SNAP_FMT "%lu"
+typedef clock_t time_snap_t;
+#endif
+
+
+void benchmarkSelect() {
+    uint32_t buffer[128];
+    uint32_t backbuffer[128];
+    uint32_t initial = 33;
+    uint32_t b;
+    time_snap_t S1, S2, S3;
+    int i;
+    printf("benchmarking select \n");
+
+    /* this test creates delta encoded buffers with different bits, then
+     * performs lower bound searches for each key */
+    for (b = 0; b <= 32; b++) {
+        uint32_t prev = initial;
+        uint32_t out[128];
+        /* initialize the buffer */
+        for (i = 0; i < 128; i++) {
+            buffer[i] =  ((uint32_t)(1655765 * i )) ;
+            if(b < 32) buffer[i] %= (1<<b);
+        }
+        for (i = 0; i < 128; i++) {
+            buffer[i] = buffer[i] + prev;
+            prev = buffer[i];
+        }
+
+        for (i = 1; i < 128; i++) {
+            if(buffer[i] < buffer[i-1] )
+                buffer[i] = buffer[i-1];
+        }
+        assert(simdmaxbitsd1(initial, buffer)<=b);
+
+        for (i = 0; i < 128; i++) {
+            out[i] = 0; /* memset would do too */
+        }
+
+        /* delta-encode to 'i' bits */
+        simdpackwithoutmaskd1(initial, buffer, (__m128i *)out, b);
+
+        S1 = time_snap();
+        for (i = 0; i < 128 * 10; i++) {
+            uint32_t valretrieved = simdselectd1(initial, (__m128i *)out, b, (uint32_t)i % 128);
+            assert(valretrieved == buffer[i%128]);
+        }
+        S2 = time_snap();
+        for (i = 0; i < 128 * 10; i++) {
+            simdunpackd1(initial,  (__m128i *)out, backbuffer, b);
+            assert(backbuffer[i % 128] == buffer[i % 128]);
+        }
+        S3 = time_snap();
+        printf("bit width = %d, fast select function time = " TIME_SNAP_FMT ", naive time = " TIME_SNAP_FMT "  \n", b, (S2-S1), (S3-S2));
+    }
+}
+
+int uint32_cmp(const void *a, const void *b)
+{
+    const uint32_t *ia = (const uint32_t *)a;
+    const uint32_t *ib = (const uint32_t *)b;
+    if(*ia < *ib)
+        return -1;
+    else if (*ia > *ib)
+        return 1;
+    return 0;
+}
+
+/* adapted from wikipedia */
+int binary_search(uint32_t * A, uint32_t key, int imin, int imax)
+{
+    int imid;
+    imax --;
+    while(imin + 1 < imax) {
+        imid = imin + ((imax - imin) / 2);
+
+        if (A[imid] > key) {
+            imax = imid;
+        } else if (A[imid] < key) {
+            imin = imid;
+        } else {
+            return imid;
+        }
+    }
+    return imax;
+}
+
+
+/* adapted from wikipedia */
+int lower_bound(uint32_t * A, uint32_t key, int imin, int imax)
+{
+    int imid;
+    imax --;
+    while(imin + 1 < imax) {
+        imid = imin + ((imax - imin) / 2);
+
+        if (A[imid] >= key) {
+            imax = imid;
+        } else if (A[imid] < key) {
+            imin = imid;
+        }
+    }
+    if(A[imin] >= key) return imin;
+    return imax;
+}
+
+void benchmarkSearch() {
+    uint32_t buffer[128];
+    uint32_t backbuffer[128];
+    uint32_t out[128];
+    uint32_t result, initial = 0;
+    uint32_t b, i;
+    time_snap_t S1, S2, S3, S4;
+
+    printf("benchmarking search \n");
+
+    /* this test creates delta encoded buffers with different bits, then
+     * performs lower bound searches for each key */
+    for (b = 0; b <= 32; b++) {
+        uint32_t prev = initial;
+        /* initialize the buffer */
+        for (i = 0; i < 128; i++) {
+            buffer[i] =  ((uint32_t)rand()) ;
+            if(b < 32) buffer[i] %= (1<<b);
+        }
+
+        qsort(buffer,128, sizeof(uint32_t), uint32_cmp);
+
+        for (i = 0; i < 128; i++) {
+            buffer[i] = buffer[i] + prev;
+            prev = buffer[i];
+        }
+        for (i = 1; i < 128; i++) {
+            if(buffer[i] < buffer[i-1] )
+                buffer[i] = buffer[i-1];
+        }
+        assert(simdmaxbitsd1(initial, buffer)<=b);
+        for (i = 0; i < 128; i++) {
+            out[i] = 0; /* memset would do too */
+        }
+
+        /* delta-encode to 'i' bits */
+        simdpackwithoutmaskd1(initial, buffer, (__m128i *)out, b);
+        simdunpackd1(initial,  (__m128i *)out, backbuffer, b);
+
+        for (i = 0; i < 128; i++) {
+            assert(buffer[i] == backbuffer[i]);
+         }
+        S1 = time_snap();
+        for (i = 0; i < 128 * 10; i++) {
+
+            int pos;
+            uint32_t pseudorandomkey  =  buffer[i%128];
+            __m128i vecinitial = _mm_set1_epi32(initial);
+            pos = simdsearchd1(&vecinitial, (__m128i *)out, b,
+                               pseudorandomkey, &result);
+            if((result < pseudorandomkey) || (buffer[pos] != result)) {
+                printf("bug A.\n");
+            } else if (pos > 0) {
+                if(buffer[pos-1] >= pseudorandomkey)
+                    printf("bug B.\n");
+            }
+        }
+        S2 = time_snap();
+        for (i = 0; i < 128 * 10; i++) {
+            int pos;
+            uint32_t pseudorandomkey  =  buffer[i%128];
+            simdunpackd1(initial,  (__m128i *)out, backbuffer, b);
+            pos =  lower_bound(backbuffer, pseudorandomkey, 0, 128);
+            result = backbuffer[pos];
+
+            if((result < pseudorandomkey) || (buffer[pos] != result)) {
+                printf("bug C.\n");
+            } else if (pos > 0) {
+                if(buffer[pos-1] >= pseudorandomkey)
+                    printf("bug D.\n");
+            }
+        }
+        S3 = time_snap();
+        for (i = 0; i < 128 * 10; i++) {
+
+            int pos;
+            uint32_t pseudorandomkey  =  buffer[i%128];
+            pos = simdsearchwithlengthd1(initial, (__m128i *)out, b, 128,
+                               pseudorandomkey, &result);
+            if((result < pseudorandomkey) || (buffer[pos] != result)) {
+                printf("bug A.\n");
+            } else if (pos > 0) {
+                if(buffer[pos-1] >= pseudorandomkey)
+                    printf("bug B.\n");
+            }
+        }
+        S4 = time_snap();
+
+        printf("bit width = %d, fast search function time = " TIME_SNAP_FMT ", naive time = " TIME_SNAP_FMT " , fast with length time = " TIME_SNAP_FMT "  \n", b, (S2-S1), (S3-S2), (S4-S3) );
+    }
+}
+
+
+int main() {
+#ifdef _MSC_VER
+    QueryPerformanceFrequency((LARGE_INTEGER *)&freq);
+#endif
+    benchmarkSearch();
+    benchmarkSelect();
+    return 0;
+}
--- a/cpp/simdcomp/benchmarks/bitpackingbenchmark.c
+++ b/cpp/simdcomp/benchmarks/bitpackingbenchmark.c
@@ -0,0 +1,205 @@
+#include <stdio.h>
+
+#include "simdcomp.h"
+
+
+#define RDTSC_START(cycles)                                                   \
+    do {                                                                      \
+        register unsigned cyc_high, cyc_low;                                  \
+        __asm volatile(                                                       \
+            "cpuid\n\t"                                                       \
+            "rdtsc\n\t"                                                       \
+            "mov %%edx, %0\n\t"                                               \
+            "mov %%eax, %1\n\t"                                               \
+            : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", "%rdx"); \
+        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;                      \
+    } while (0)
+
+#define RDTSC_FINAL(cycles)                                                   \
+    do {                                                                      \
+        register unsigned cyc_high, cyc_low;                                  \
+        __asm volatile(                                                       \
+            "rdtscp\n\t"                                                      \
+            "mov %%edx, %0\n\t"                                               \
+            "mov %%eax, %1\n\t"                                               \
+            "cpuid\n\t"                                                       \
+            : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", "%rdx"); \
+        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;                      \
+    } while (0)
+
+
+
+
+uint32_t * get_random_array_from_bit_width(uint32_t length, uint32_t bit) {
+    uint32_t * answer = malloc(sizeof(uint32_t) * length);
+    uint32_t mask = (uint32_t) ((UINT64_C(1) << bit) - 1);
+    uint32_t i;
+    for(i = 0; i < length; ++i) {
+        answer[i] = rand() & mask;
+    }
+    return answer;
+}
+
+uint32_t * get_random_array_from_bit_width_d1(uint32_t length, uint32_t bit) {
+    uint32_t * answer = malloc(sizeof(uint32_t) * length);
+    uint32_t mask = (uint32_t) ((UINT64_C(1) << bit) - 1);
+    uint32_t i;
+    answer[0] = rand() & mask;
+    for(i = 1; i < length; ++i) {
+        answer[i] = answer[i-1] + (rand() & mask);
+    }
+    return answer;
+}
+
+
+void demo128() {
+    const uint32_t length = 128;
+    uint32_t bit;
+    printf("# --- %s\n", __func__);
+    printf("# compressing %d integers\n",length);
+    printf("# format: bit width, pack in cycles per int, unpack in cycles per int\n");
+    for(bit = 1; bit <= 32; ++bit) {
+        uint32_t i;
+
+        uint32_t * data = get_random_array_from_bit_width(length, bit);
+        __m128i * buffer = malloc(length * sizeof(uint32_t));
+        uint32_t * backdata = malloc(length * sizeof(uint32_t));
+        uint32_t repeat = 500;
+        uint64_t min_diff;
+        printf("%d\t",bit);
+        min_diff = (uint64_t)-1;
+        for (i = 0; i < repeat; i++) {
+            uint64_t cycles_start, cycles_final, cycles_diff;
+            __asm volatile("" ::: /* pretend to clobber */ "memory");
+            RDTSC_START(cycles_start);
+            simdpackwithoutmask(data,buffer, bit);
+            RDTSC_FINAL(cycles_final);
+            cycles_diff = (cycles_final - cycles_start);
+            if (cycles_diff < min_diff) min_diff = cycles_diff;
+        }
+        printf("%.2f\t",min_diff*1.0/length);
+        min_diff = (uint64_t)-1;
+        for (i = 0; i < repeat; i++) {
+            uint64_t cycles_start, cycles_final, cycles_diff;
+            __asm volatile("" ::: /* pretend to clobber */ "memory");
+            RDTSC_START(cycles_start);
+            simdunpack(buffer, backdata,bit);
+            RDTSC_FINAL(cycles_final);
+            cycles_diff = (cycles_final - cycles_start);
+            if (cycles_diff < min_diff) min_diff = cycles_diff;
+        }
+        printf("%.2f\t",min_diff*1.0/length);
+
+        free(data);
+        free(buffer);
+        free(backdata);
+        printf("\n");
+    }
+    printf("\n\n"); /* two blank lines are required by gnuplot */
+}
+
+void demo128_d1() {
+    const uint32_t length = 128;
+    uint32_t bit;
+    printf("# --- %s\n", __func__);
+    printf("# compressing %d integers\n",length);
+    printf("# format: bit width, pack in cycles per int, unpack in cycles per int\n");
+    for(bit = 1; bit <= 32; ++bit) {
+        uint32_t i;
+
+        uint32_t * data = get_random_array_from_bit_width_d1(length, bit);
+        __m128i * buffer = malloc(length * sizeof(uint32_t));
+        uint32_t * backdata = malloc(length * sizeof(uint32_t));
+        uint32_t repeat = 500;
+        uint64_t min_diff;
+        printf("%d\t",bit);
+        min_diff = (uint64_t)-1;
+        for (i = 0; i < repeat; i++) {
+            uint64_t cycles_start, cycles_final, cycles_diff;
+            __asm volatile("" ::: /* pretend to clobber */ "memory");
+            RDTSC_START(cycles_start);
+            simdpackwithoutmaskd1(0,data,buffer, bit);
+            RDTSC_FINAL(cycles_final);
+            cycles_diff = (cycles_final - cycles_start);
+            if (cycles_diff < min_diff) min_diff = cycles_diff;
+        }
+        printf("%.2f\t",min_diff*1.0/length);
+        min_diff = (uint64_t)-1;
+        for (i = 0; i < repeat; i++) {
+            uint64_t cycles_start, cycles_final, cycles_diff;
+            __asm volatile("" ::: /* pretend to clobber */ "memory");
+            RDTSC_START(cycles_start);
+            simdunpackd1(0,buffer, backdata,bit);
+            RDTSC_FINAL(cycles_final);
+            cycles_diff = (cycles_final - cycles_start);
+            if (cycles_diff < min_diff) min_diff = cycles_diff;
+        }
+        printf("%.2f\t",min_diff*1.0/length);
+
+        free(data);
+        free(buffer);
+        free(backdata);
+        printf("\n");
+    }
+    printf("\n\n"); /* two blank lines are required by gnuplot */
+}
+
+#ifdef __AVX2__
+void demo256() {
+    const uint32_t length = 256;
+    uint32_t bit;
+    printf("# --- %s\n", __func__);
+    printf("# compressing %d integers\n",length);
+    printf("# format: bit width, pack in cycles per int, unpack in cycles per int\n");
+    for(bit = 1; bit <= 32; ++bit) {
+        uint32_t i;
+
+        uint32_t * data = get_random_array_from_bit_width(length, bit);
+        __m256i * buffer = malloc(length * sizeof(uint32_t));
+        uint32_t * backdata = malloc(length * sizeof(uint32_t));
+        uint32_t repeat = 500;
+        uint64_t min_diff;
+        printf("%d\t",bit);
+        min_diff = (uint64_t)-1;
+        for (i = 0; i < repeat; i++) {
+            uint64_t cycles_start, cycles_final, cycles_diff;
+            __asm volatile("" ::: /* pretend to clobber */ "memory");
+            RDTSC_START(cycles_start);
+            avxpackwithoutmask(data,buffer, bit);
+            RDTSC_FINAL(cycles_final);
+            cycles_diff = (cycles_final - cycles_start);
+            if (cycles_diff < min_diff) min_diff = cycles_diff;
+        }
+        printf("%.2f\t",min_diff*1.0/length);
+        min_diff = (uint64_t)-1;
+        for (i = 0; i < repeat; i++) {
+            uint64_t cycles_start, cycles_final, cycles_diff;
+            __asm volatile("" ::: /* pretend to clobber */ "memory");
+            RDTSC_START(cycles_start);
+            avxunpack(buffer, backdata,bit);
+            RDTSC_FINAL(cycles_final);
+            cycles_diff = (cycles_final - cycles_start);
+            if (cycles_diff < min_diff) min_diff = cycles_diff;
+        }
+        printf("%.2f\t",min_diff*1.0/length);
+
+        free(data);
+        free(buffer);
+        free(backdata);
+        printf("\n");
+    }
+    printf("\n\n"); /* two blank lines are required by gnuplot */
+}
+#endif /* avx 2 */
+
+
+int main() {
+    demo128();
+    demo128_d1();
+#ifdef __AVX2__
+    demo256();
+#endif
+    return 0;
+
+
+}
--- a/cpp/simdcomp/example.c
+++ b/cpp/simdcomp/example.c
@@ -0,0 +1,195 @@
+/* Type "make example" to build this example program. */
+#include <stdio.h>
+#include <time.h>
+#include <stdlib.h>
+#include "simdcomp.h"
+
+/**
+We provide several different code examples.
+**/
+
+
+/* very simple test to illustrate a simple application */
+int compress_decompress_demo() {
+    size_t k, N = 9999;
+    __m128i * endofbuf;
+    int howmanybytes;
+    float compratio;
+    uint32_t * datain = malloc(N * sizeof(uint32_t));
+    uint8_t * buffer;
+    uint32_t * backbuffer = malloc(N * sizeof(uint32_t));
+    uint32_t b;
+    printf("== simple test\n");
+
+    for (k = 0; k < N; ++k) {       /* start with k=0, not k=1! */
+        datain[k] = k;
+    }
+
+    b = maxbits_length(datain, N);
+    buffer = malloc(simdpack_compressedbytes(N,b));
+    endofbuf = simdpack_length(datain, N, (__m128i *)buffer, b);
+    howmanybytes = (endofbuf-(__m128i *)buffer)*sizeof(__m128i); /* number of compressed bytes */
+    compratio = N*sizeof(uint32_t) * 1.0 / howmanybytes;
+    /* endofbuf points to the end of the compressed data */
+    buffer = realloc(buffer,(endofbuf-(__m128i *)buffer)*sizeof(__m128i)); /* optional but safe. */
+    printf("Compressed %d integers down to %d bytes (comp. ratio = %f).\n",(int)N,howmanybytes,compratio);
+    /* in actual applications b must be stored and retrieved: caller is responsible for that. */
+    simdunpack_length((const __m128i *)buffer, N, backbuffer, b); /* will return a pointer to endofbuf */ 
+
+    for (k = 0; k < N; ++k) {
+        if(datain[k] != backbuffer[k]) {
+            printf("bug at %lu \n",(unsigned long)k);
+            return -1;
+        }
+    }
+    printf("Code works!\n");
+    free(datain);
+    free(buffer);
+    free(backbuffer);
+    return 0;
+}
+
+
+
+/* compresses data from datain to buffer, returns how many bytes written
+used below in simple_demo */
+size_t compress(uint32_t * datain, size_t length, uint8_t * buffer) {
+    uint32_t offset;
+    uint8_t * initout;
+    size_t k;
+    if(length/SIMDBlockSize*SIMDBlockSize != length) {
+        printf("Data length should be a multiple of %i \n",SIMDBlockSize);
+    }
+    offset = 0;
+    initout = buffer;
+    for(k = 0; k < length / SIMDBlockSize; ++k) {
+        uint32_t b = simdmaxbitsd1(offset,
+                                   datain + k * SIMDBlockSize);
+        *buffer++ = b;
+        simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, (__m128i *) buffer,
+                              b);
+        offset = datain[k * SIMDBlockSize + SIMDBlockSize - 1];
+        buffer += b * sizeof(__m128i);
+    }
+    return buffer - initout;
+}
+
+/* Another illustration ... */
+void simple_demo() {
+    size_t REPEAT = 10, gap;
+    size_t N = 1000 * SIMDBlockSize;/* SIMDBlockSize is 128 */
+    uint32_t * datain = malloc(N * sizeof(uint32_t));
+    size_t compsize;
+    clock_t start, end;
+    uint8_t * buffer = malloc(N * sizeof(uint32_t) + N / SIMDBlockSize); /* output buffer */
+    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
+    printf("== simple demo\n");
+    for (gap = 1; gap <= 243; gap *= 3) {
+        size_t k, repeat;
+        uint32_t offset = 0;
+        uint32_t bogus = 0;
+        double numberofseconds;
+
+        printf("\n");
+        printf(" gap = %lu \n", (unsigned long) gap);
+        datain[0] = 0;
+        for (k = 1; k < N; ++k)
+            datain[k] = datain[k-1] + ( rand() % (gap + 1) );
+        compsize = compress(datain,N,buffer);
+        printf("compression ratio = %f \n",  (N * sizeof(uint32_t))/ (compsize * 1.0 ));
+        start = clock();
+        for(repeat = 0; repeat < REPEAT; ++repeat) {
+            uint8_t * decbuffer = buffer;
+            for (k = 0; k * SIMDBlockSize < N; ++k) {
+                uint8_t b = *decbuffer++;
+                simdunpackd1(offset, (__m128i *) decbuffer, backbuffer, b);
+                /* do something here with backbuffer */
+                bogus += backbuffer[3];
+                decbuffer += b * sizeof(__m128i);
+                offset = backbuffer[SIMDBlockSize - 1];
+            }
+        }
+        end = clock();
+        numberofseconds = (end-start)/(double)CLOCKS_PER_SEC;
+        printf("decoding speed in million of integers per second %f \n",N*REPEAT/(numberofseconds*1000.0*1000.0));
+        start = clock();
+        for(repeat = 0; repeat < REPEAT; ++repeat) {
+            uint8_t * decbuffer = buffer;
+            for (k = 0; k * SIMDBlockSize < N; ++k) {
+                memcpy(backbuffer,decbuffer+k*SIMDBlockSize,SIMDBlockSize*sizeof(uint32_t));
+                bogus += backbuffer[3] - backbuffer[100];
+            }
+        }
+        end = clock();
+        numberofseconds = (end-start)/(double)CLOCKS_PER_SEC;
+        printf("memcpy speed in million of integers per second %f \n",N*REPEAT/(numberofseconds*1000.0*1000.0));
+        printf("ignore me %i \n",bogus);
+        printf("All tests are in CPU cache. Avoid out-of-cache decoding in applications.\n");
+    }
+    free(buffer);
+    free(datain);
+    free(backbuffer);
+}
+
+/* Used below in more_sophisticated_demo ... */
+size_t varying_bit_width_compress(uint32_t * datain, size_t length, uint8_t * buffer) {
+    uint8_t * initout;
+    size_t k;
+    if(length/SIMDBlockSize*SIMDBlockSize != length) {
+        printf("Data length should be a multiple of %i \n",SIMDBlockSize);
+    }
+    initout = buffer;
+    for(k = 0; k < length / SIMDBlockSize; ++k) {
+        uint32_t b = maxbits(datain);
+        *buffer++ = b;
+        simdpackwithoutmask(datain, (__m128i *)buffer, b);
+        datain += SIMDBlockSize;
+        buffer += b * sizeof(__m128i);
+    }
+    return buffer - initout;
+}
+
+/* Here we compress the data in blocks of 128 integers with varying bit width */
+int varying_bit_width_demo() {
+    size_t nn = 128 * 2;
+    uint32_t * datainn = malloc(nn * sizeof(uint32_t));
+    uint8_t * buffern = malloc(nn * sizeof(uint32_t) + nn / SIMDBlockSize);
+    uint8_t * initbuffern = buffern;
+    uint32_t * backbuffern = malloc(nn * sizeof(uint32_t));
+    size_t k, compsize;
+    printf("== varying bit-width demo\n");
+
+    for(k=0; k<nn; ++k) {
+        datainn[k] = rand() % (k + 1);
+    }
+
+    compsize = varying_bit_width_compress(datainn,nn,buffern);
+    printf("encoded size: %u (original size: %u)\n", (unsigned)compsize,
+           (unsigned)(nn * sizeof(uint32_t)));
+
+    for (k = 0; k * SIMDBlockSize < nn; ++k) {
+        uint32_t b = *buffern;
+        buffern++;
+        simdunpack((const __m128i *)buffern, backbuffern + k * SIMDBlockSize, b);
+        buffern += b * sizeof(__m128i);
+    }
+
+    for (k = 0; k < nn; ++k) {
+        if(backbuffern[k] != datainn[k]) {
+            printf("bug\n");
+            return -1;
+        }
+    }
+    printf("Code works!\n");
+    free(datainn);
+    free(initbuffern);
+    free(backbuffern);
+    return 0;
+}
+
+int main() {
+    if(compress_decompress_demo() != 0) return -1;
+    if(varying_bit_width_demo() != 0) return -1;
+    simple_demo();
+    return 0;
+}
--- a/cpp/simdcomp/go/README.md
+++ b/cpp/simdcomp/go/README.md
@@ -0,0 +1,13 @@
+Simple Go demo
+==============
+
+Setup
+======
+
+Start by installing the simdcomp library (make && make install).
+
+Then type:
+
+go run test.go
+
+
--- a/cpp/simdcomp/go/test.go
+++ b/cpp/simdcomp/go/test.go
@@ -0,0 +1,71 @@
+/////////
+// This particular file is in the public domain.
+// Author: Daniel Lemire
+////////
+
+package main 
+
+/*
+#cgo LDFLAGS: -lsimdcomp
+#include <simdcomp.h>
+*/
+import "C"
+import "fmt"
+
+//////////
+// For this demo, we pack and unpack blocks of 128 integers
+/////////
+func main() {
+        // I am going to use C types. Alternative might be to use unsafe.Pointer calls, see http://bit.ly/1ndw3W3
+        // this is our original data
+        var data [128]C.uint32_t
+        for i := C.uint32_t(0); i < C.uint32_t(128); i++ {
+            data[i] = i
+        }
+
+
+
+
+
+        ////////////
+        // We first pack without differential coding
+        ///////////
+        // computing how many bits per int. is needed
+        b  := C.maxbits(&data[0])
+        ratio := 32.0/float64(b)
+        fmt.Println("Bit width  ", b)
+        fmt.Println(fmt.Sprintf("Compression ratio %f ", ratio))
+         // we are now going to create a buffer to receive the packed data (each __m128i uses 128 bits)
+        out := make([] C.__m128i,b)       
+        C.simdpackwithoutmask( &data[0],&out[0],b);
+        var recovereddata [128]C.uint32_t
+        C.simdunpack(&out[0],&recovereddata[0],b)
+        for i := 0; i < 128; i++ {
+            if data[i] != recovereddata[i]  {
+                  fmt.Println("Bug ")
+                  return
+            }
+        } 
+
+        ///////////
+        // Next, we use differential coding
+        //////////
+        offset := C.uint32_t(0) // if you pack data from K to K + 128, offset should be the value at K-1. When K = 0, choose a default
+        b1  := C.simdmaxbitsd1(offset,&data[0])
+        ratio1 := 32.0/float64(b1)
+        fmt.Println("Bit width  ", b1)
+        fmt.Println(fmt.Sprintf("Compression ratio %f ", ratio1))
+         // we are now going to create a buffer to receive the packed data (each __m128i uses 128 bits)
+        out = make([] C.__m128i,b1)       
+        C.simdpackwithoutmaskd1(offset, &data[0],&out[0],b1);
+        C.simdunpackd1(offset,&out[0],&recovereddata[0],b1)
+        for i := 0; i < 128; i++ {
+            if data[i] != recovereddata[i]  {
+                  fmt.Println("Bug ")
+                  return
+            }
+        } 
+
+        fmt.Println("test succesful.")
+      
+}
--- a/cpp/simdcomp/include/avxbitpacking.h
+++ b/cpp/simdcomp/include/avxbitpacking.h
@@ -0,0 +1,40 @@
+/**
+ * This code is released under a BSD License.
+ */
+
+#ifndef INCLUDE_AVXBITPACKING_H_
+#define INCLUDE_AVXBITPACKING_H_
+
+
+#ifdef __AVX2__
+
+#include "portability.h"
+
+
+/* AVX2 is required */
+#include <immintrin.h>
+/* for memset */
+#include <string.h>
+
+#include "simdcomputil.h"
+
+enum{ AVXBlockSize = 256};
+
+/* max integer logarithm over a range of AVXBlockSize integers (256 integer) */
+uint32_t avxmaxbits(const uint32_t * begin);
+
+/* reads 256 values from "in", writes  "bit" 256-bit vectors to "out" */
+void avxpack(const uint32_t *  in,__m256i *  out, const uint32_t bit);
+
+/* reads 256 values from "in", writes  "bit" 256-bit vectors to "out" */
+void avxpackwithoutmask(const uint32_t *  in,__m256i *  out, const uint32_t bit);
+
+/* reads  "bit" 256-bit vectors from "in", writes  256 values to "out" */
+void avxunpack(const __m256i *  in,uint32_t *  out, const uint32_t bit);
+
+
+
+
+#endif /* __AVX2__ */
+
+#endif /* INCLUDE_AVXBITPACKING_H_ */
--- a/cpp/simdcomp/include/portability.h
+++ b/cpp/simdcomp/include/portability.h
@@ -0,0 +1,81 @@
+/**
+ * This code is released under a BSD License.
+ */
+#ifndef SIMDBITCOMPAT_H_
+#define SIMDBITCOMPAT_H_
+
+#include <iso646.h> /* mostly for Microsoft compilers */
+#include <string.h>
+
+#if SIMDCOMP_DEBUG
+# define SIMDCOMP_ALWAYS_INLINE inline
+# define SIMDCOMP_NEVER_INLINE
+# define SIMDCOMP_PURE
+#else
+# if defined(__GNUC__)
+#  if __GNUC__ >= 3
+#   define SIMDCOMP_ALWAYS_INLINE inline __attribute__((always_inline))
+#   define SIMDCOMP_NEVER_INLINE __attribute__((noinline))
+#   define SIMDCOMP_PURE __attribute__((pure))
+#  else
+#   define SIMDCOMP_ALWAYS_INLINE inline
+#   define SIMDCOMP_NEVER_INLINE
+#   define SIMDCOMP_PURE
+#  endif
+# elif defined(_MSC_VER)
+#  define SIMDCOMP_ALWAYS_INLINE __forceinline
+#  define SIMDCOMP_NEVER_INLINE
+#  define SIMDCOMP_PURE
+# else
+#  if __has_attribute(always_inline)
+#   define SIMDCOMP_ALWAYS_INLINE inline __attribute__((always_inline))
+#  else
+#   define SIMDCOMP_ALWAYS_INLINE inline
+#  endif
+#  if __has_attribute(noinline)
+#   define SIMDCOMP_NEVER_INLINE __attribute__((noinline))
+#  else
+#   define SIMDCOMP_NEVER_INLINE
+#  endif
+#  if __has_attribute(pure)
+#   define SIMDCOMP_PURE __attribute__((pure))
+#  else
+#   define SIMDCOMP_PURE
+#  endif
+# endif
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER < 1600
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+#else
+#include <stdint.h> /* part of Visual Studio 2010 and better, others likely anyway */
+#endif
+
+#if defined(_MSC_VER)
+#define SIMDCOMP_ALIGNED(x) __declspec(align(x))
+#else
+#if defined(__GNUC__)
+#define SIMDCOMP_ALIGNED(x) __attribute__ ((aligned(x)))
+#endif
+#endif
+
+#if defined(_MSC_VER)
+# include <intrin.h>
+/* 64-bit needs extending */
+# define SIMDCOMP_CTZ(result, mask) do { \
+		unsigned long index; \
+		if (!_BitScanForward(&(index), (mask))) { \
+			(result) = 32U; \
+		} else { \
+			(result) = (uint32_t)(index); \
+		} \
+	} while (0)
+#else
+# define SIMDCOMP_CTZ(result, mask) \
+	result = __builtin_ctz(mask)
+#endif
+
+#endif /* SIMDBITCOMPAT_H_ */
+
--- a/cpp/simdcomp/include/simdbitpacking.h
+++ b/cpp/simdcomp/include/simdbitpacking.h
@@ -0,0 +1,72 @@
+/**
+ * This code is released under a BSD License.
+ */
+#ifndef SIMDBITPACKING_H_
+#define SIMDBITPACKING_H_
+
+#include "portability.h"
+
+/* SSE2 is required */
+#include <emmintrin.h>
+/* for memset */
+#include <string.h>
+
+#include "simdcomputil.h"
+
+/***
+* Please see example.c for various examples on how to make good use
+* of these functions.
+*/
+
+
+
+/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out".
+ * The input values are masked so that only the least significant "bit" bits are used. */
+void simdpack(const uint32_t *  in,__m128i *  out, const uint32_t bit);
+
+/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out".
+ * The input values are assumed to be less than 1<<bit. */
+void simdpackwithoutmask(const uint32_t *  in,__m128i *  out, const uint32_t bit);
+
+/* reads  "bit" 128-bit vectors from "in", writes  128 values to "out" */
+void simdunpack(const __m128i *  in,uint32_t *  out, const uint32_t bit);
+
+
+
+/* how many compressed bytes are needed to compressed length integers using a bit width of bit with 
+the  simdpackFOR_length function. */
+int simdpack_compressedbytes(int length, const uint32_t bit);
+
+/* like simdpack, but supports an undetermined number of inputs.
+ * This is useful if you need to unpack an array of integers that is not divisible by 128 integers.
+ * Returns a pointer to the (advanced) compressed array. Compressed data is stored in the memory location between 
+ the provided (out) pointer and the returned pointer. */
+__m128i * simdpack_length(const uint32_t *   in, size_t length, __m128i *    out, const uint32_t bit);
+
+/* like simdunpack, but supports an undetermined number of inputs.
+ * This is useful if you need to unpack an array of integers that is not divisible by 128 integers.
+ * Returns a pointer to the (advanced) compressed array. The read compressed data is between the provided 
+ (in) pointer and the returned pointer. */
+const __m128i * simdunpack_length(const __m128i *   in, size_t length, uint32_t * out, const uint32_t bit);
+
+
+
+
+/* like simdpack, but supports an undetermined small number of inputs. This is useful if you need to pack less 
+than 128 integers.
+ * Note that this function is much slower.
+ * Returns a pointer to the (advanced) compressed array. Compressed data is stored in the memory location 
+ between the provided (out) pointer and the returned pointer. */
+__m128i * simdpack_shortlength(const uint32_t *   in, int length, __m128i *    out, const uint32_t bit);
+
+/* like simdunpack, but supports an undetermined small number of inputs. This is useful if you need to unpack less
+ than 128 integers.
+ * Note that this function is much slower.
+ * Returns a pointer to the (advanced) compressed array. The read compressed data is between the provided (in) 
+ pointer and the returned pointer. */
+const __m128i * simdunpack_shortlength(const __m128i *   in, int length, uint32_t * out, const uint32_t bit);
+
+/* given a block of 128 packed values, this function sets the value at index "index" to "value" */
+void simdfastset(__m128i * in128, uint32_t b, uint32_t value, size_t index);
+
+#endif /* SIMDBITPACKING_H_ */
--- a/cpp/simdcomp/include/simdcomp.h
+++ b/cpp/simdcomp/include/simdcomp.h
@@ -0,0 +1,22 @@
+/**
+ * This code is released under a BSD License.
+ */
+
+#ifndef SIMDCOMP_H_
+#define SIMDCOMP_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "simdbitpacking.h"
+#include "simdcomputil.h"
+#include "simdfor.h"
+#include "simdintegratedbitpacking.h"
+#include "avxbitpacking.h"
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif 
--- a/cpp/simdcomp/include/simdcomputil.h
+++ b/cpp/simdcomp/include/simdcomputil.h
@@ -0,0 +1,54 @@
+/**
+ * This code is released under a BSD License.
+ */
+
+#ifndef SIMDCOMPUTIL_H_
+#define SIMDCOMPUTIL_H_
+
+#include "portability.h"
+
+/* SSE2 is required */
+#include <emmintrin.h>
+
+
+
+
+/* returns the integer logarithm of v (bit width) */
+uint32_t bits(const uint32_t v);
+
+/* max integer logarithm over a range of SIMDBlockSize integers (128 integer) */
+uint32_t maxbits(const uint32_t * begin);
+
+/* same as maxbits, but we specify the number of integers */
+uint32_t maxbits_length(const uint32_t * in,uint32_t length);
+
+enum{ SIMDBlockSize = 128};
+
+
+/* computes (quickly) the minimal value of 128 values */
+uint32_t simdmin(const uint32_t * in);
+
+/* computes (quickly) the minimal value of the specified number of values */
+uint32_t simdmin_length(const uint32_t * in, uint32_t length);
+
+#ifdef __SSE4_1__
+/* computes (quickly) the minimal and maximal value of the specified number of values */
+void simdmaxmin_length(const uint32_t * in, uint32_t length, uint32_t * getmin, uint32_t * getmax);
+
+/* computes (quickly) the minimal and maximal value of the 128 values */
+void simdmaxmin(const uint32_t * in, uint32_t * getmin, uint32_t * getmax);
+
+#endif
+
+/* like maxbit over 128 integers (SIMDBlockSize) with provided initial value 
+   and using differential coding */
+uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in);
+
+/* like simdmaxbitsd1, but calculates maxbits over |length| integers 
+   with provided initial value. |length| can be any arbitrary value. */
+uint32_t simdmaxbitsd1_length(uint32_t initvalue, const uint32_t * in,
+                uint32_t length);
+
+
+
+#endif /* SIMDCOMPUTIL_H_ */
--- a/cpp/simdcomp/include/simdfor.h
+++ b/cpp/simdcomp/include/simdfor.h
@@ -0,0 +1,72 @@
+/**
+ * This code is released under a BSD License.
+ */
+#ifndef INCLUDE_SIMDFOR_H_
+#define INCLUDE_SIMDFOR_H_
+
+#include "portability.h"
+
+/* SSE2 is required */
+#include <emmintrin.h>
+
+#include "simdcomputil.h"
+#include "simdbitpacking.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out" */
+void simdpackFOR(uint32_t initvalue, const uint32_t *  in,__m128i *  out, const uint32_t bit);
+
+
+/* reads "bit" 128-bit vectors from "in", writes  128 values to "out" */
+void simdunpackFOR(uint32_t initvalue, const __m128i *  in,uint32_t *  out, const uint32_t bit);
+
+
+/* how many compressed bytes are needed to compressed length integers using a bit width of bit with 
+the  simdpackFOR_length function. */
+int simdpackFOR_compressedbytes(int length, const uint32_t bit);
+
+/* like simdpackFOR, but supports an undetermined number of inputs. 
+This is useful if you need to pack less than 128 integers. Note that this function is much slower. 
+ Compressed data is stored in the memory location between 
+ the provided (out) pointer and the returned pointer. */
+__m128i * simdpackFOR_length(uint32_t initvalue, const uint32_t *   in, int length, __m128i *    out, const uint32_t bit);
+
+/* like simdunpackFOR, but supports an undetermined number of inputs. 
+This is useful if you need to unpack less than 128 integers. Note that this function is much slower. 
+ The read compressed data is between the provided 
+ (in) pointer and the returned pointer.  */
+const __m128i * simdunpackFOR_length(uint32_t initvalue, const __m128i *   in, int length, uint32_t * out, const uint32_t bit);
+
+
+/* returns the value stored at the specified "slot".
+* */
+uint32_t simdselectFOR(uint32_t initvalue, const __m128i *in, uint32_t bit,
+                int slot);
+
+/* given a block of 128 packed values, this function sets the value at index "index" to "value" */
+void simdfastsetFOR(uint32_t initvalue, __m128i * in, uint32_t bit, uint32_t value, size_t index);
+
+
+/* searches "bit" 128-bit vectors from "in" (= length<=128 encoded integers) for the first encoded uint32 value
+ * which is >= |key|, and returns its position. It is assumed that the values
+ * stored are in sorted order.
+ * The encoded key is stored in "*presult".
+ * The first length decoded integers, ignoring others. If no value is larger or equal to the key,
+ * length is returned. Length should be no larger than 128.
+ *
+ * If no value is larger or equal to the key,
+* length is returned */
+int simdsearchwithlengthFOR(uint32_t initvalue, const __m128i *in, uint32_t bit,
+                int length, uint32_t key, uint32_t *presult);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+
+
+
+#endif /* INCLUDE_SIMDFOR_H_ */
--- a/cpp/simdcomp/include/simdintegratedbitpacking.h
+++ b/cpp/simdcomp/include/simdintegratedbitpacking.h
@@ -0,0 +1,98 @@
+/**
+ * This code is released under a BSD License.
+ */
+
+#ifndef SIMD_INTEGRATED_BITPACKING_H
+#define SIMD_INTEGRATED_BITPACKING_H
+
+#include "portability.h"
+
+/* SSE2 is required */
+#include <emmintrin.h>
+
+#include "simdcomputil.h"
+#include "simdbitpacking.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out"
+   integer values should be in sorted order (for best results).
+   The differences are masked so that only the least significant "bit" bits are used. */
+void simdpackd1(uint32_t initvalue, const uint32_t *  in,__m128i *  out, const uint32_t bit);
+
+
+/* reads 128 values from "in", writes  "bit" 128-bit vectors to "out"
+   integer values should be in sorted order (for best results).
+   The difference values are assumed to be less than 1<<bit. */
+void simdpackwithoutmaskd1(uint32_t initvalue, const uint32_t *  in,__m128i *  out, const uint32_t bit);
+
+
+/* reads "bit" 128-bit vectors from "in", writes  128 values to "out" */
+void simdunpackd1(uint32_t initvalue, const __m128i *  in,uint32_t *  out, const uint32_t bit);
+
+
+/* searches "bit" 128-bit vectors from "in" (= 128 encoded integers) for the first encoded uint32 value
+ * which is >= |key|, and returns its position. It is assumed that the values
+ * stored are in sorted order.
+ * The encoded key is stored in "*presult". If no value is larger or equal to the key,
+* 128 is returned. The pointer initOffset is a pointer to the last four value decoded
+* (when starting out, this can be a zero vector or initialized with _mm_set1_epi32(init)),
+* and the vector gets updated.
+**/
+int
+simdsearchd1(__m128i * initOffset, const __m128i *in, uint32_t bit,
+                uint32_t key, uint32_t *presult);
+
+
+/* searches "bit" 128-bit vectors from "in" (= length<=128 encoded integers) for the first encoded uint32 value
+ * which is >= |key|, and returns its position. It is assumed that the values
+ * stored are in sorted order.
+ * The encoded key is stored in "*presult".
+ * The first length decoded integers, ignoring others. If no value is larger or equal to the key,
+ * length is returned. Length should be no larger than 128.
+ *
+ * If no value is larger or equal to the key,
+* length is returned */
+int simdsearchwithlengthd1(uint32_t initvalue, const __m128i *in, uint32_t bit,
+                int length, uint32_t key, uint32_t *presult);
+
+
+
+/* returns the value stored at the specified "slot".
+* */
+uint32_t simdselectd1(uint32_t initvalue, const __m128i *in, uint32_t bit,
+                int slot);
+
+/* given a block of 128 packed values, this function sets the value at index "index" to "value",
+ * you must somehow know the previous value.
+ * Because of differential coding, all following values are incremented by the offset between this new
+ * value and the old value... 
+ * This functions is useful if you want to modify the last value. 
+ */
+void simdfastsetd1fromprevious( __m128i * in, uint32_t bit, uint32_t previousvalue, uint32_t value, size_t index);
+
+/* given a block of 128 packed values, this function sets the value at index "index" to "value",
+ * This function computes the previous value if needed.
+ * Because of differential coding, all following values are incremented by the offset between this new
+ * value and the old value...
+ * This functions is useful if you want to modify the last value. 
+ */
+void simdfastsetd1(uint32_t initvalue, __m128i * in, uint32_t bit, uint32_t value, size_t index);
+
+
+/*Simply scan the data
+* The pointer initOffset is a pointer to the last four value decoded
+* (when starting out, this can be a zero vector or initialized with _mm_set1_epi32(init);),
+* and the vector gets updated.
+* */
+
+void
+simdscand1(__m128i * initOffset, const __m128i *in, uint32_t bit);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif
--- a/cpp/simdcomp/makefile
+++ b/cpp/simdcomp/makefile
@@ -0,0 +1,79 @@
+# minimalist makefile
+.SUFFIXES:
+#
+.SUFFIXES: .cpp .o .c .h
+ifeq ($(DEBUG),1)
+CFLAGS = -fPIC  -std=c89 -ggdb -msse4.1 -march=native -Wall -Wextra -Wshadow -fsanitize=undefined  -fno-omit-frame-pointer -fsanitize=address
+else
+CFLAGS = -fPIC -std=c89 -O3 -msse4.1  -march=native -Wall -Wextra -Wshadow
+endif # debug
+LDFLAGS = -shared
+LIBNAME=libsimdcomp.so.0.0.3
+all:  unit unit_chars bitpackingbenchmark $(LIBNAME)
+test:
+	./unit
+	./unit_chars
+install: $(OBJECTS)
+	cp $(LIBNAME) /usr/local/lib
+	ln -s /usr/local/lib/$(LIBNAME) /usr/local/lib/libsimdcomp.so
+	ldconfig
+	cp $(HEADERS) /usr/local/include
+
+
+
+HEADERS=./include/simdbitpacking.h ./include/simdcomputil.h ./include/simdintegratedbitpacking.h ./include/simdcomp.h ./include/simdfor.h ./include/avxbitpacking.h
+
+uninstall:
+	for h in $(HEADERS) ; do rm  /usr/local/$$h; done
+	rm  /usr/local/lib/$(LIBNAME)
+	rm /usr/local/lib/libsimdcomp.so
+	ldconfig
+
+
+OBJECTS= simdbitpacking.o simdintegratedbitpacking.o simdcomputil.o \
+		 simdpackedsearch.o simdpackedselect.o simdfor.o avxbitpacking.o
+
+$(LIBNAME): $(OBJECTS)
+	$(CC) $(CFLAGS) -o $(LIBNAME) $(OBJECTS)  $(LDFLAGS)
+
+
+avxbitpacking.o: ./src/avxbitpacking.c $(HEADERS)
+	$(CC) $(CFLAGS) -c ./src/avxbitpacking.c -Iinclude
+
+
+simdfor.o: ./src/simdfor.c $(HEADERS)
+	$(CC) $(CFLAGS) -c ./src/simdfor.c -Iinclude
+
+
+simdcomputil.o: ./src/simdcomputil.c $(HEADERS)
+	$(CC) $(CFLAGS) -c ./src/simdcomputil.c -Iinclude
+
+simdbitpacking.o: ./src/simdbitpacking.c $(HEADERS)
+	$(CC) $(CFLAGS) -c ./src/simdbitpacking.c -Iinclude
+
+simdintegratedbitpacking.o: ./src/simdintegratedbitpacking.c  $(HEADERS)
+	$(CC) $(CFLAGS) -c ./src/simdintegratedbitpacking.c -Iinclude
+
+simdpackedsearch.o: ./src/simdpackedsearch.c $(HEADERS)
+	$(CC) $(CFLAGS) -c ./src/simdpackedsearch.c -Iinclude
+
+simdpackedselect.o: ./src/simdpackedselect.c $(HEADERS)
+	$(CC) $(CFLAGS) -c ./src/simdpackedselect.c -Iinclude
+
+example: ./example.c    $(HEADERS) $(OBJECTS)
+	$(CC) $(CFLAGS) -o example ./example.c -Iinclude  $(OBJECTS)
+
+unit: ./tests/unit.c    $(HEADERS) $(OBJECTS)
+	$(CC) $(CFLAGS) -o unit ./tests/unit.c -Iinclude  $(OBJECTS)
+
+bitpackingbenchmark: ./benchmarks/bitpackingbenchmark.c    $(HEADERS) $(OBJECTS)
+	$(CC) $(CFLAGS) -o bitpackingbenchmark ./benchmarks/bitpackingbenchmark.c -Iinclude  $(OBJECTS)
+benchmark: ./benchmarks/benchmark.c    $(HEADERS) $(OBJECTS)
+	$(CC) $(CFLAGS) -o benchmark ./benchmarks/benchmark.c -Iinclude  $(OBJECTS)
+dynunit: ./tests/unit.c    $(HEADERS) $(LIBNAME)
+	$(CC) $(CFLAGS) -o dynunit ./tests/unit.c -Iinclude  -lsimdcomp
+
+unit_chars: ./tests/unit_chars.c    $(HEADERS) $(OBJECTS)
+	$(CC) $(CFLAGS) -o unit_chars ./tests/unit_chars.c -Iinclude  $(OBJECTS)
+clean:
+	rm -f unit *.o $(LIBNAME) example benchmark bitpackingbenchmark dynunit unit_chars
--- a/cpp/simdcomp/makefile.vc
+++ b/cpp/simdcomp/makefile.vc
@@ -0,0 +1,104 @@
+
+!IFNDEF MACHINE
+!IF "$(PROCESSOR_ARCHITECTURE)"=="AMD64"
+MACHINE=x64
+!ELSE
+MACHINE=x86
+!ENDIF
+!ENDIF
+
+!IFNDEF DEBUG
+DEBUG=no
+!ENDIF
+
+!IFNDEF CC
+CC=cl.exe
+!ENDIF
+
+!IFNDEF AR
+AR=lib.exe
+!ENDIF
+
+!IFNDEF LINK
+LINK=link.exe
+!ENDIF
+
+!IFNDEF PGO
+PGO=no
+!ENDIF
+
+!IFNDEF PGI
+PGI=no
+!ENDIF
+
+INC = /Iinclude
+
+!IF "$(DEBUG)"=="yes"
+CFLAGS = /nologo /MDd /LDd /Od /Zi /D_DEBUG /RTC1 /W3 /GS /Gm
+ARFLAGS = /nologo
+LDFLAGS = /nologo /debug /nodefaultlib:msvcrt
+!ELSE
+CFLAGS = /nologo /MD /O2 /Zi /DNDEBUG /W3 /Gm- /GS /Gy /Oi /GL /MP
+ARFLAGS = /nologo /LTCG
+LDFLAGS = /nologo /LTCG /DYNAMICBASE /incremental:no /debug /opt:ref,icf
+!ENDIF
+
+!IF "$(PGI)"=="yes"
+LDFLAGS = $(LDFLAGS) /ltcg:pgi
+!ENDIF
+
+!IF "$(PGO)"=="yes"
+LDFLAGS = $(LDFLAGS) /ltcg:pgo
+!ENDIF
+
+LIB_OBJS = simdbitpacking.obj simdintegratedbitpacking.obj simdcomputil.obj \
+	simdpackedsearch.obj simdpackedselect.obj simdfor.obj
+
+
+all: lib dll dynunit unit_chars example benchmark
+# need some good use case scenario to train the instrumented build
+	@if "$(PGI)"=="yes" echo Running PGO training
+	@if "$(PGI)"=="yes" benchmark.exe >nul 2>&1
+	@if "$(PGI)"=="yes" example.exe >nul 2>&1
+
+
+$(LIB_OBJS):
+	$(CC) $(INC) $(CFLAGS) /c src/simdbitpacking.c src/simdintegratedbitpacking.c src/simdcomputil.c \
+		src/simdpackedsearch.c src/simdpackedselect.c src/simdfor.c
+
+lib: $(LIB_OBJS)
+	$(AR) $(ARFLAGS) /OUT:simdcomp_a.lib $(LIB_OBJS)
+
+dll: $(LIB_OBJS)
+	$(LINK) /DLL $(LDFLAGS) /OUT:simdcomp.dll /IMPLIB:simdcomp.lib /DEF:simdcomp.def $(LIB_OBJS)
+
+unit: lib
+	$(CC) $(INC) $(CFLAGS) /c src/unit.c 
+	$(LINK) $(LDFLAGS) /OUT:unit.exe unit.obj simdcomp_a.lib
+
+dynunit: dll
+	$(CC) $(INC) $(CFLAGS) /c src/unit.c 
+	$(LINK) $(LDFLAGS) /OUT:unit.exe unit.obj simdcomp.lib
+
+unit_chars: lib
+	$(CC) $(INC) $(CFLAGS) /c src/unit_chars.c
+	$(LINK) $(LDFLAGS) /OUT:unit_chars.exe unit_chars.obj simdcomp.lib
+
+
+example: lib
+	$(CC) $(INC) $(CFLAGS) /c example.c
+	$(LINK) $(LDFLAGS) /OUT:example.exe example.obj simdcomp.lib
+
+benchmark: lib
+	$(CC) $(INC) $(CFLAGS) /c src/benchmark.c
+	$(LINK) $(LDFLAGS) /OUT:benchmark.exe benchmark.obj simdcomp.lib
+
+clean:
+	del /Q *.obj
+	del /Q *.lib
+	del /Q *.exe
+	del /Q *.dll
+	del /Q *.pgc
+	del /Q *.pgd
+	del /Q *.pdb
+
--- a/cpp/simdcomp/package.json
+++ b/cpp/simdcomp/package.json
@@ -0,0 +1,16 @@
+{
+  "name": "simdcomp",
+  "version": "0.0.3",
+  "repo": "lemire/simdcomp",
+  "description": "A simple C library for compressing lists of integers",
+  "license": "BSD-3-Clause",
+  "src": [
+    "src/simdbitpacking.c",
+    "src/simdcomputil.c",
+    "src/simdintegratedbitpacking.c",
+    "include/simdbitpacking.h",
+    "include/simdcomp.h",
+    "include/simdcomputil.h",
+    "include/simdintegratedbitpacking.h"
+  ]
+}
--- a/cpp/simdcomp/scripts/avxpacking.py
+++ b/cpp/simdcomp/scripts/avxpacking.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python
+import sys
+def howmany(bit):
+    """ how many values are we going to pack? """
+    return 256
+
+def howmanywords(bit):
+    return (howmany(bit) * bit + 255)/256
+
+def howmanybytes(bit):
+    return howmanywords(bit) * 16
+
+print("""
+/** code generated by avxpacking.py starts here **/
+""")
+
+print("""typedef void (*avxpackblockfnc)(const uint32_t * pin, __m256i * compressed);""")
+print("""typedef void (*avxunpackblockfnc)(const __m256i * compressed, uint32_t * pout);""")
+
+
+
+
+
+
+def plurial(number):
+    if(number <> 1):
+        return "s"
+    else :
+        return ""
+
+print("")
+print("static void avxpackblock0(const uint32_t * pin, __m256i * compressed) {");
+print("  (void)compressed;");
+print("  (void) pin; /* we consumed {0} 32-bit integer{1} */ ".format(howmany(0),plurial(howmany(0))));
+print("}");
+print("")
+
+for bit in range(1,33):
+    print("")
+    print("/* we are going to pack {0} {1}-bit values, touching {2} 256-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
+    print("static void avxpackblock{0}(const uint32_t * pin, __m256i * compressed) {{".format(bit));
+    print("  const __m256i * in = (const __m256i *)  pin;");
+    print("  /* we are going to touch  {0} 256-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
+    if(howmanywords(bit) == 1):
+      print("  __m256i w0;")
+    else:
+      print("  __m256i w0, w1;")
+    if( (bit & (bit-1)) <> 0) : print("  __m256i tmp; /* used to store inputs at word boundary */")
+    oldword = 0
+    for j in range(howmany(bit)/8):
+      firstword = j * bit / 32
+      if(firstword > oldword):
+        print("  _mm256_storeu_si256(compressed + {0}, w{1});".format(oldword,oldword%2))
+        oldword = firstword
+      secondword = (j * bit + bit - 1)/32
+      firstshift = (j*bit) % 32
+      if( firstword == secondword):
+          if(firstshift == 0):
+            print("  w{0} = _mm256_lddqu_si256 (in + {1});".format(firstword%2,j))
+          else:
+            print("  w{0} = _mm256_or_si256(w{0},_mm256_slli_epi32(_mm256_lddqu_si256 (in + {1}) , {2}));".format(firstword%2,j,firstshift))
+      else:
+          print("  tmp = _mm256_lddqu_si256 (in + {0});".format(j))
+          print("  w{0} = _mm256_or_si256(w{0},_mm256_slli_epi32(tmp , {2}));".format(firstword%2,j,firstshift))
+          secondshift = 32-firstshift
+          print("  w{0} = _mm256_srli_epi32(tmp,{2});".format(secondword%2,j,secondshift))
+    print("  _mm256_storeu_si256(compressed + {0}, w{1});".format(secondword,secondword%2))
+    print("}");
+    print("")
+
+
+print("")
+print("static void avxpackblockmask0(const uint32_t * pin, __m256i * compressed) {");
+print("  (void)compressed;");
+print("  (void) pin; /* we consumed {0} 32-bit integer{1} */ ".format(howmany(0),plurial(howmany(0))));
+print("}");
+print("")
+
+for bit in range(1,33):
+    print("")
+    print("/* we are going to pack {0} {1}-bit values, touching {2} 256-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
+    print("static void avxpackblockmask{0}(const uint32_t * pin, __m256i * compressed) {{".format(bit));
+    print("  /* we are going to touch  {0} 256-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
+    if(howmanywords(bit) == 1):
+      print("  __m256i w0;")
+    else:
+      print("  __m256i w0, w1;")
+    print("  const __m256i * in = (const __m256i *) pin;");
+    if(bit < 32): print("  const __m256i mask = _mm256_set1_epi32({0});".format((1<<bit)-1));
+    def maskfnc(x):
+        if(bit == 32): return x
+        return " _mm256_and_si256 ( mask, {0}) ".format(x)
+    if( (bit & (bit-1)) <> 0) : print("  __m256i tmp; /* used to store inputs at word boundary */")
+    oldword = 0
+    for j in range(howmany(bit)/8):
+      firstword = j * bit / 32
+      if(firstword > oldword):
+        print("  _mm256_storeu_si256(compressed + {0}, w{1});".format(oldword,oldword%2))
+        oldword = firstword
+      secondword = (j * bit + bit - 1)/32
+      firstshift = (j*bit) % 32
+      loadstr = maskfnc(" _mm256_lddqu_si256 (in + {0}) ".format(j))
+      if( firstword == secondword):
+          if(firstshift == 0):
+            print("  w{0} = {1};".format(firstword%2,loadstr))
+          else:
+            print("  w{0} = _mm256_or_si256(w{0},_mm256_slli_epi32({1} , {2}));".format(firstword%2,loadstr,firstshift))
+      else:
+          print("  tmp = {0};".format(loadstr))
+          print("  w{0} = _mm256_or_si256(w{0},_mm256_slli_epi32(tmp , {2}));".format(firstword%2,j,firstshift))
+          secondshift = 32-firstshift
+          print("  w{0} = _mm256_srli_epi32(tmp,{2});".format(secondword%2,j,secondshift))
+    print("  _mm256_storeu_si256(compressed + {0}, w{1});".format(secondword,secondword%2))
+    print("}");
+    print("")
+
+
+print("static void avxunpackblock0(const __m256i * compressed, uint32_t * pout) {");
+print("  (void) compressed;");
+print("  memset(pout,0,{0});".format(howmany(0)));
+print("}");
+print("")
+
+for bit in range(1,33):
+    print("")
+    print("/* we packed {0} {1}-bit values, touching {2} 256-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
+    print("static void avxunpackblock{0}(const __m256i * compressed, uint32_t * pout) {{".format(bit));
+    print("  /* we are going to access  {0} 256-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
+    if(howmanywords(bit) == 1):
+      print("  __m256i w0;")
+    else:
+      print("  __m256i w0, w1;")
+    print("  __m256i * out = (__m256i *) pout;");
+    if(bit < 32): print("  const __m256i mask = _mm256_set1_epi32({0});".format((1<<bit)-1));
+    maskstr = " _mm256_and_si256 ( mask, {0}) "
+    if (bit == 32) : maskstr = " {0} " # no need
+    oldword = 0
+    print("  w0 = _mm256_lddqu_si256 (compressed);")
+    for j in range(howmany(bit)/8):
+      firstword = j * bit / 32
+      secondword = (j * bit + bit - 1)/32
+      if(secondword > oldword):
+        print("  w{0} = _mm256_lddqu_si256 (compressed + {1});".format(secondword%2,secondword))
+        oldword = secondword
+      firstshift = (j*bit) % 32
+      firstshiftstr = "_mm256_srli_epi32( w{0} , "+str(firstshift)+") "
+      if(firstshift == 0):
+          firstshiftstr =" w{0} " # no need
+      wfirst = firstshiftstr.format(firstword%2)
+      if( firstword == secondword):
+          if(firstshift + bit <> 32):
+            wfirst  = maskstr.format(wfirst)
+          print("  _mm256_storeu_si256(out + {0}, {1});".format(j,wfirst))
+      else:
+          secondshift = (32-firstshift)
+          wsecond = "_mm256_slli_epi32( w{0} , {1} ) ".format((firstword+1)%2,secondshift)
+          wfirstorsecond = " _mm256_or_si256 ({0},{1}) ".format(wfirst,wsecond)
+          wfirstorsecond = maskstr.format(wfirstorsecond)
+          print("  _mm256_storeu_si256(out + {0},\n    {1});".format(j,wfirstorsecond))
+    print("}");
+    print("")
+
+
+print("static avxpackblockfnc avxfuncPackArr[] = {")
+for bit in range(0,32):
+  print("&avxpackblock{0},".format(bit))
+print("&avxpackblock32")
+print("};")
+
+print("static avxpackblockfnc avxfuncPackMaskArr[] = {")
+for bit in range(0,32):
+  print("&avxpackblockmask{0},".format(bit))
+print("&avxpackblockmask32")
+print("};")
+
+
+print("static avxunpackblockfnc avxfuncUnpackArr[] = {")
+for bit in range(0,32):
+  print("&avxunpackblock{0},".format(bit))
+print("&avxunpackblock32")
+print("};")
+print("/** code generated by avxpacking.py ends here **/")
--- a/cpp/simdcomp/scripts/simdfor.py
+++ b/cpp/simdcomp/scripts/simdfor.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+
+
+from math import ceil
+
+print("""
+/**
+* Blablabla
+*
+*/
+
+""");
+
+def mask(bit):
+  return str((1 << bit) - 1)
+
+for length in [32]:
+  print("""
+static __m128i  iunpackFOR0(__m128i initOffset, const __m128i *   _in , uint32_t *    _out) {
+    __m128i       *out = (__m128i*)(_out);
+    int i;
+    (void) _in;
+    for (i = 0; i < 8; ++i) {
+        _mm_store_si128(out++, initOffset);
+    	_mm_store_si128(out++, initOffset);
+        _mm_store_si128(out++, initOffset);
+        _mm_store_si128(out++, initOffset);
+    }
+
+    return initOffset;
+}
+
+  """)
+  print("""
+
+static void ipackFOR0(__m128i initOffset , const uint32_t *   _in , __m128i *  out  ) {
+    (void) initOffset;
+    (void) _in;
+    (void) out;
+}
+""") 
+  for bit in range(1,33):
+    offsetVar = " initOffset";
+    print("""  
+static void ipackFOR"""+str(bit)+"""(__m128i """+offsetVar+""", const uint32_t *   _in, __m128i *   out) {
+    const __m128i       *in = (const __m128i*)(_in);
+    __m128i    OutReg;
+
+      """);
+    
+    if (bit != 32):
+      print("    __m128i CurrIn = _mm_load_si128(in);");
+      print("    __m128i InReg = _mm_sub_epi32(CurrIn, initOffset);");
+    else:
+      print("    __m128i InReg = _mm_load_si128(in);");
+      print("    (void) initOffset;");
+
+
+    inwordpointer = 0
+    valuecounter = 0
+    for k in range(ceil((length * bit) / 32)):
+      if(valuecounter == length): break
+      for x in range(inwordpointer,32,bit):
+        if(x!=0) :
+          print("    OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(InReg, " + str(x) + "));");
+        else:
+          print("    OutReg = InReg; ");
+        if((x+bit>=32) ):
+          while(inwordpointer<32):
+            inwordpointer += bit
+          print("    _mm_store_si128(out, OutReg);");
+          print("");
+
+          if(valuecounter + 1 < length):
+            print("    ++out;")
+          inwordpointer -= 32;
+          if(inwordpointer>0):
+            print("    OutReg = _mm_srli_epi32(InReg, " + str(bit) + " - " + str(inwordpointer) + ");");
+        if(valuecounter + 1 < length):
+          print("    ++in;") 
+
+          if (bit != 32):
+            print("    CurrIn = _mm_load_si128(in);");
+            print("    InReg = _mm_sub_epi32(CurrIn, initOffset);");
+          else:
+            print("    InReg = _mm_load_si128(in);");
+          print("");
+        valuecounter = valuecounter + 1
+        if(valuecounter == length): break
+    assert(valuecounter == length)
+    print("\n}\n\n""")
+
+  for bit in range(1,32):
+    offsetVar = " initOffset";
+    print("""\n
+static __m128i iunpackFOR"""+str(bit)+"""(__m128i """+offsetVar+""", const  __m128i*   in, uint32_t *   _out) {
+      """);
+    print("""    __m128i*   out = (__m128i*)(_out);
+    __m128i    InReg = _mm_load_si128(in);
+    __m128i    OutReg;    
+    __m128i     tmp;
+    const __m128i mask =  _mm_set1_epi32((1U<<"""+str(bit)+""")-1);
+
+    """);
+
+    MainText = "";
+
+    MainText += "\n";
+    inwordpointer = 0
+    valuecounter = 0
+    for k in range(ceil((length * bit) / 32)):
+      for x in range(inwordpointer,32,bit):
+        if(valuecounter == length): break
+        if (x > 0):
+          MainText += "    tmp = _mm_srli_epi32(InReg," + str(x) +");\n"; 
+        else:
+          MainText += "    tmp = InReg;\n"; 
+        if(x+bit<32):
+          MainText += "    OutReg = _mm_and_si128(tmp, mask);\n";
+        else:
+          MainText += "    OutReg = tmp;\n";        
+        if((x+bit>=32) ):      
+          while(inwordpointer<32):
+            inwordpointer += bit
+          if(valuecounter + 1 < length):
+             MainText += "    ++in;"
+             MainText += "    InReg = _mm_load_si128(in);\n";
+          inwordpointer -= 32;
+          if(inwordpointer>0):
+            MainText += "    OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, " + str(bit) + "-" + str(inwordpointer) + "), mask));\n\n";
+        if (bit != 32):
+          MainText += "    OutReg = _mm_add_epi32(OutReg, initOffset);\n"; 
+        MainText += "    _mm_store_si128(out++, OutReg);\n\n"; 
+        MainText += "";
+        valuecounter = valuecounter + 1
+        if(valuecounter == length): break
+    assert(valuecounter == length)
+    print(MainText)
+    print("    return initOffset;");
+    print("\n}\n\n")
+  print("""
+static __m128i iunpackFOR32(__m128i initvalue , const  __m128i*   in, uint32_t *    _out) {
+	__m128i * mout = (__m128i *)_out;
+	__m128i invec;
+	size_t k;
+	for(k = 0; k < 128/4; ++k) {
+		invec =  _mm_load_si128(in++);
+	    _mm_store_si128(mout++, invec);
+	}
+	return invec;
+}
+  """)
--- a/cpp/simdcomp/simdcomp.def
+++ b/cpp/simdcomp/simdcomp.def
@@ -0,0 +1,40 @@
+EXPORTS
+	simdpack
+	simdpackwithoutmask
+	simdunpack
+	bits
+	maxbits
+	maxbits_length
+	simdmin
+	simdmin_length
+	simdmaxmin
+	simdmaxmin_length
+	simdmaxbitsd1
+	simdmaxbitsd1_length
+	simdpackd1
+	simdpackwithoutmaskd1
+	simdunpackd1
+	simdsearchd1
+	simdsearchwithlengthd1
+	simdselectd1
+	simdpackFOR
+	simdselectFOR
+	simdsearchwithlengthFOR
+	simdunpackFOR
+	simdmin_length
+	simdmaxmin
+	simdmaxmin_length
+	simdpack_length
+	simdpackFOR_length
+	simdunpackFOR_length
+	simdpack_shortlength
+	simdfastsetFOR
+	simdfastset
+	simdfastsetd1
+	simdunpack_length
+	simdunpack_shortlength
+	simdsearchwithlengthFOR
+	simdscand1
+	simdfastsetd1fromprevious
+	simdfastsetd1
+
--- a/cpp/simdcomp/src/avxbitpacking.c
+++ b/cpp/simdcomp/src/avxbitpacking.c
--- a/cpp/simdcomp/src/simdbitpacking.c
+++ b/cpp/simdcomp/src/simdbitpacking.c
--- a/cpp/simdcomp/src/simdcomputil.c
+++ b/cpp/simdcomp/src/simdcomputil.c
@@ -0,0 +1,234 @@
+/**
+ * This code is released under a BSD License.
+ */
+
+#include "simdcomputil.h"
+#ifdef __SSE4_1__
+#include <smmintrin.h>
+#endif
+#include <assert.h>
+
+#define Delta(curr, prev) \
+    _mm_sub_epi32(curr, \
+            _mm_or_si128(_mm_slli_si128(curr, 4), _mm_srli_si128(prev, 12)))
+
+/* returns the integer logarithm of v (bit width) */
+uint32_t bits(const uint32_t v) {
+#ifdef _MSC_VER
+    unsigned long answer;
+    if (v == 0) {
+        return 0;
+    }
+    _BitScanReverse(&answer, v);
+    return answer + 1;
+#else
+    return v == 0 ? 0 : 32 - __builtin_clz(v); /* assume GCC-like compiler if not microsoft */
+#endif
+}
+
+
+
+static uint32_t maxbitas32int(const __m128i accumulator) {
+	const __m128i _tmp1 = _mm_or_si128(_mm_srli_si128(accumulator, 8), accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/
+	const __m128i _tmp2 = _mm_or_si128(_mm_srli_si128(_tmp1, 4), _tmp1); /*  (A,B,C xor A,D xor B) xor  (0,0,0,C xor A)*/
+	uint32_t ans =  _mm_cvtsi128_si32(_tmp2);
+	return bits(ans);
+}
+
+SIMDCOMP_PURE uint32_t maxbits(const uint32_t * begin) {
+	    const __m128i* pin = (const __m128i*)(begin);
+	    __m128i accumulator = _mm_loadu_si128(pin);
+	    uint32_t k = 1;
+	    for(; 4*k < SIMDBlockSize; ++k) {
+	    	__m128i newvec = _mm_loadu_si128(pin+k);
+	        accumulator = _mm_or_si128(accumulator,newvec);
+	    }
+	    return maxbitas32int(accumulator);
+}
+static uint32_t orasint(const __m128i accumulator) {
+	const __m128i _tmp1 = _mm_or_si128(_mm_srli_si128(accumulator, 8), accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/
+	const __m128i _tmp2 = _mm_or_si128(_mm_srli_si128(_tmp1, 4), _tmp1); /*  (A,B,C xor A,D xor B) xor  (0,0,0,C xor A)*/
+	return  _mm_cvtsi128_si32(_tmp2);
+}
+
+#ifdef __SSE4_1__
+
+static uint32_t minasint(const __m128i accumulator) {
+	const __m128i _tmp1 = _mm_min_epu32(_mm_srli_si128(accumulator, 8), accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/
+	const __m128i _tmp2 = _mm_min_epu32(_mm_srli_si128(_tmp1, 4), _tmp1); /*  (A,B,C xor A,D xor B) xor  (0,0,0,C xor A)*/
+	return  _mm_cvtsi128_si32(_tmp2);
+}
+
+static uint32_t maxasint(const __m128i accumulator) {
+	const __m128i _tmp1 = _mm_max_epu32(_mm_srli_si128(accumulator, 8), accumulator); /* (A,B,C,D) xor (0,0,A,B) = (A,B,C xor A,D xor B)*/
+	const __m128i _tmp2 = _mm_max_epu32(_mm_srli_si128(_tmp1, 4), _tmp1); /*  (A,B,C xor A,D xor B) xor  (0,0,0,C xor A)*/
+	return  _mm_cvtsi128_si32(_tmp2);
+}
+
+uint32_t simdmin(const uint32_t * in) {
+    const __m128i* pin = (const __m128i*)(in);
+    __m128i accumulator =  _mm_loadu_si128(pin);
+     uint32_t k = 1;
+     for(; 4*k < SIMDBlockSize; ++k) {
+    	 __m128i newvec = _mm_loadu_si128(pin+k);
+         accumulator = _mm_min_epu32(accumulator,newvec);
+     }
+     return minasint(accumulator);
+}
+
+void simdmaxmin(const uint32_t * in, uint32_t * getmin, uint32_t * getmax) {
+    const __m128i* pin = (const __m128i*)(in);
+    __m128i minaccumulator =  _mm_loadu_si128(pin);
+    __m128i maxaccumulator =  minaccumulator;
+    uint32_t k = 1;
+     for(; 4*k < SIMDBlockSize; ++k) {
+    	 __m128i newvec = _mm_loadu_si128(pin+k);
+         minaccumulator = _mm_min_epu32(minaccumulator,newvec);
+         maxaccumulator = _mm_max_epu32(maxaccumulator,newvec);
+     }
+     *getmin = minasint(minaccumulator);
+     *getmax = maxasint(maxaccumulator);
+}
+
+
+uint32_t simdmin_length(const uint32_t * in, uint32_t length) {
+	uint32_t currentmin = 0xFFFFFFFF;
+	uint32_t lengthdividedby4 = length / 4;
+	uint32_t offset = lengthdividedby4 * 4;
+	uint32_t k;
+	if (lengthdividedby4 > 0) {
+		const __m128i* pin = (const __m128i*)(in);
+		__m128i accumulator = _mm_loadu_si128(pin);
+		k = 1;
+		for(; 4*k < lengthdividedby4 * 4; ++k) {
+			__m128i newvec = _mm_loadu_si128(pin+k);
+			accumulator = _mm_min_epu32(accumulator,newvec);
+		}
+		currentmin = minasint(accumulator);
+	}
+	for (k = offset; k < length; ++k)
+		if (in[k] < currentmin)
+			currentmin = in[k];
+	return currentmin;
+}
+
+void simdmaxmin_length(const uint32_t * in, uint32_t length, uint32_t * getmin, uint32_t * getmax) {
+	uint32_t lengthdividedby4 = length / 4;
+	uint32_t offset = lengthdividedby4 * 4;
+	uint32_t k;
+	*getmin = 0xFFFFFFFF;
+	*getmax = 0;
+	if (lengthdividedby4 > 0) {
+		const __m128i* pin = (const __m128i*)(in);
+		__m128i minaccumulator = _mm_loadu_si128(pin);
+		__m128i maxaccumulator = minaccumulator;
+		k = 1;
+		for(; 4*k < lengthdividedby4 * 4; ++k) {
+			__m128i newvec = _mm_loadu_si128(pin+k);
+			minaccumulator = _mm_min_epu32(minaccumulator,newvec);
+			maxaccumulator = _mm_max_epu32(maxaccumulator,newvec);
+		}
+		*getmin = minasint(minaccumulator);
+		*getmax = maxasint(maxaccumulator);
+	}
+	for (k = offset; k < length; ++k) {
+		if (in[k] < *getmin)
+			*getmin = in[k];
+		if (in[k] > *getmax)
+			*getmax = in[k];
+	}
+}
+
+#endif
+
+SIMDCOMP_PURE uint32_t maxbits_length(const uint32_t * in,uint32_t length) {
+	  uint32_t k;
+	  uint32_t lengthdividedby4 = length / 4;
+	  uint32_t offset = lengthdividedby4 * 4;
+	  uint32_t bigxor = 0;
+	  if(lengthdividedby4 > 0) {
+		    const __m128i* pin = (const __m128i*)(in);
+		    __m128i accumulator = _mm_loadu_si128(pin);
+		    k = 1;
+		    for(; 4*k < 4*lengthdividedby4; ++k) {
+		    	__m128i newvec = _mm_loadu_si128(pin+k);
+		        accumulator = _mm_or_si128(accumulator,newvec);
+		    }
+		    bigxor = orasint(accumulator);
+	  }
+	  for(k = offset; k < length; ++k)
+		  bigxor |= in[k];
+	  return bits(bigxor);
+}
+
+
+/* maxbit over 128 integers (SIMDBlockSize) with provided initial value */
+uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in) {
+    __m128i  initoffset = _mm_set1_epi32 (initvalue);
+    const __m128i* pin = (const __m128i*)(in);
+    __m128i newvec = _mm_loadu_si128(pin);
+    __m128i accumulator = Delta(newvec , initoffset);
+    __m128i oldvec = newvec;
+    uint32_t k = 1;
+    for(; 4*k < SIMDBlockSize; ++k) {
+        newvec = _mm_loadu_si128(pin+k);
+        accumulator = _mm_or_si128(accumulator,Delta(newvec , oldvec));
+        oldvec = newvec;
+    }
+    initoffset = oldvec;
+    return maxbitas32int(accumulator);
+}
+
+
+/* maxbit over |length| integers with provided initial value */
+uint32_t simdmaxbitsd1_length(uint32_t initvalue, const uint32_t * in,
+                uint32_t length) {
+    __m128i newvec;
+    __m128i oldvec;
+    __m128i initoffset;
+    __m128i accumulator;
+    const __m128i *pin;
+    uint32_t tmparray[4];
+    uint32_t k = 1;
+    uint32_t acc;
+
+    assert(length > 0);
+
+    pin = (const __m128i *)(in);
+    initoffset = _mm_set1_epi32(initvalue);
+    switch (length) {
+      case 1:
+        newvec = _mm_set1_epi32(in[0]);
+        break;
+      case 2:
+        newvec = _mm_setr_epi32(in[0], in[1], in[1], in[1]);
+        break;
+      case 3:
+        newvec = _mm_setr_epi32(in[0], in[1], in[2], in[2]);
+        break;
+      default:
+        newvec = _mm_loadu_si128(pin);
+        break;
+    }
+    accumulator = Delta(newvec, initoffset);
+    oldvec = newvec;
+
+    /* process 4 integers and build an accumulator */
+    while (k * 4 + 4 <= length) {
+        newvec = _mm_loadu_si128(pin + k);
+        accumulator = _mm_or_si128(accumulator, Delta(newvec, oldvec));
+        oldvec = newvec;
+        k++;
+    }
+
+    /* extract the accumulator as an integer */
+    _mm_storeu_si128((__m128i *)(tmparray), accumulator);
+    acc = tmparray[0] | tmparray[1] | tmparray[2] | tmparray[3];
+
+    /* now process the remaining integers */
+    for (k *= 4; k < length; k++)
+        acc |= in[k] - (k == 0 ? initvalue : in[k - 1]);
+
+    /* return the number of bits */
+    return bits(acc);
+}
--- a/cpp/simdcomp/src/simdfor.c
+++ b/cpp/simdcomp/src/simdfor.c
--- a/cpp/simdcomp/src/simdintegratedbitpacking.c
+++ b/cpp/simdcomp/src/simdintegratedbitpacking.c
--- a/cpp/simdcomp/src/simdpackedsearch.c
+++ b/cpp/simdcomp/src/simdpackedsearch.c
--- a/cpp/simdcomp/src/simdpackedselect.c
+++ b/cpp/simdcomp/src/simdpackedselect.c
--- a/cpp/simdcomp/tests/unit.c
+++ b/cpp/simdcomp/tests/unit.c
@@ -0,0 +1,900 @@
+/**
+ * This code is released under a BSD License.
+ */
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "simdcomp.h"
+
+
+
+int testshortpack() {
+	int bit;
+	size_t i;
+	size_t length;
+	__m128i * bb;
+	srand(0);
+	printf("testshortpack\n");
+	for (bit = 0; bit < 32; ++bit) {
+		const size_t N = 128;
+		uint32_t * data = malloc(N * sizeof(uint32_t));
+		uint32_t * backdata = malloc(N * sizeof(uint32_t));
+		uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
+
+		for (i = 0; i < N; ++i) {
+			data[i] = rand() & ((1 << bit) - 1);
+		}
+		for (length = 0; length <= N; ++length) {
+			for (i = 0; i < N; ++i) {
+				backdata[i] = 0;
+			}
+			bb = simdpack_shortlength(data, length, (__m128i *) buffer,
+					bit);
+			if((bb - (__m128i *) buffer) * sizeof(__m128i) != (unsigned) simdpack_compressedbytes(length,bit)) {
+			 printf("bug\n");
+			 return -1;
+			}
+			simdunpack_shortlength((__m128i *) buffer, length,
+					backdata, bit);
+			for (i = 0; i < length; ++i) {
+
+				if (data[i] != backdata[i]) {
+				    printf("bug\n");
+					return -1;
+				}
+			}
+		}
+		free(data);
+		free(backdata);
+		free(buffer);
+	}
+	return 0;
+}
+
+int testlongpack() {
+	int bit;
+	size_t i;
+	size_t length;
+	__m128i * bb;
+	srand(0);
+	printf("testlongpack\n");
+	for (bit = 0; bit < 32; ++bit) {
+		const size_t N = 2048;
+		uint32_t * data = malloc(N * sizeof(uint32_t));
+		uint32_t * backdata = malloc(N * sizeof(uint32_t));
+		uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
+
+		for (i = 0; i < N; ++i) {
+			data[i] = rand() & ((1 << bit) - 1);
+		}
+		for (length = 0; length <= N; ++length) {
+			for (i = 0; i < N; ++i) {
+				backdata[i] = 0;
+			}
+			bb = simdpack_length(data, length, (__m128i *) buffer,
+					bit);
+			if((bb - (__m128i *) buffer) * sizeof(__m128i) != (unsigned) simdpack_compressedbytes(length,bit)) {
+			 printf("bug\n");
+			 return -1;
+			}
+			simdunpack_length((__m128i *) buffer, length,
+					backdata, bit);
+			for (i = 0; i < length; ++i) {
+
+				if (data[i] != backdata[i]) {
+				    printf("bug\n");
+					return -1;
+				}
+			}
+		}
+		free(data);
+		free(backdata);
+		free(buffer);
+	}
+	return 0;
+}
+
+
+
+int testset() {
+	int bit;
+	size_t i;
+	const size_t N = 128;
+	uint32_t * data = malloc(N * sizeof(uint32_t));
+	uint32_t * backdata = malloc(N * sizeof(uint32_t));
+	uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
+
+	srand(0);
+
+	for (bit = 0; bit < 32; ++bit) {
+		printf("simple set %d \n",bit);
+
+		for (i = 0; i < N; ++i) {
+			data[i] = rand() & ((1 << bit) - 1);
+		}
+		for (i = 0; i < N; ++i) {
+			backdata[i] = 0;
+		}
+		simdpack(data, (__m128i *) buffer, bit);
+		simdunpack((__m128i *) buffer, backdata, bit);
+		for (i = 0; i < N; ++i) {
+			if (data[i] != backdata[i]) {
+			    printf("bug\n");
+				return -1;
+			}
+		}
+
+		for(i = N  ; i > 0; i--) {
+			simdfastset((__m128i *) buffer, bit, data[N - i], i - 1);
+		}
+		simdunpack((__m128i *) buffer, backdata, bit);
+		for (i = 0; i < N; ++i) {
+			if (data[i] != backdata[N - i - 1]) {
+			    printf("bug\n");
+				return -1;
+			}
+		}
+		simdpack(data, (__m128i *) buffer, bit);
+		for(i = 1  ; i <= N; i++) {
+			simdfastset((__m128i *) buffer, bit, data[i - 1], i - 1);
+		}
+		simdunpack((__m128i *) buffer, backdata, bit);
+		for (i = 0; i < N; ++i) {
+			if (data[i] != backdata[i]) {
+			    printf("bug\n");
+				return -1;
+			}
+		}
+
+	}
+	free(data);
+	free(backdata);
+	free(buffer);
+
+	return 0;
+}
+
+#ifdef __SSE4_1__
+
+int testsetd1() {
+	int bit;
+	size_t i;
+	uint32_t newvalue;
+	const size_t N = 128;
+	uint32_t * data = malloc(N * sizeof(uint32_t));
+	uint32_t * datazeroes = malloc(N * sizeof(uint32_t));
+
+	uint32_t * backdata = malloc(N * sizeof(uint32_t));
+	uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
+
+	srand(0);
+	for (bit = 0; bit < 32; ++bit) {
+		printf("simple set d1 %d \n",bit);
+		data[0] = rand() & ((1 << bit) - 1);
+		datazeroes[0] = 0;
+
+		for (i = 1; i < N; ++i) {
+			data[i] = data[i - 1] + (rand() & ((1 << bit) - 1));
+			datazeroes[i] = 0;
+		}
+		for (i = 0; i < N; ++i) {
+			backdata[i] = 0;
+		}
+		simdpackd1(0,datazeroes, (__m128i *) buffer, bit);
+ 	    for(i = 1  ; i <= N; i++) {
+			simdfastsetd1(0,(__m128i *) buffer, bit, data[i - 1], i - 1);
+			newvalue = simdselectd1(0, (const __m128i *) buffer, bit,i - 1);
+			if( newvalue != data[i-1] ) {
+				printf("bad set-select\n");
+				return -1;
+			}
+		}
+		simdunpackd1(0,(__m128i *) buffer, backdata, bit);
+		for (i = 0; i < N; ++i) {
+			if (data[i] != backdata[i])
+				return -1;
+		}
+	}
+	free(data);
+	free(backdata);
+	free(buffer);
+        free(datazeroes);
+	return 0;
+}
+#endif
+
+int testsetFOR() {
+	int bit;
+	size_t i;
+	uint32_t newvalue;
+	const size_t N = 128;
+	uint32_t * data = malloc(N * sizeof(uint32_t));
+	uint32_t * datazeroes = malloc(N * sizeof(uint32_t));
+
+	uint32_t * backdata = malloc(N * sizeof(uint32_t));
+	uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
+
+	srand(0);
+	for (bit = 0; bit < 32; ++bit) {
+		printf("simple set FOR %d \n",bit);
+		for (i = 0; i < N; ++i) {
+			data[i] = (rand() & ((1 << bit) - 1));
+			datazeroes[i] = 0;
+		}
+		for (i = 0; i < N; ++i) {
+			backdata[i] = 0;
+		}
+		simdpackFOR(0,datazeroes, (__m128i *) buffer, bit);
+ 	    for(i = 1  ; i <= N; i++) {
+ 	    	simdfastsetFOR(0,(__m128i *) buffer, bit, data[i - 1], i - 1);
+			newvalue = simdselectFOR(0, (const __m128i *) buffer, bit,i - 1);
+			if( newvalue != data[i-1] ) {
+				printf("bad set-select\n");
+				return -1;
+			}
+		}
+		simdunpackFOR(0,(__m128i *) buffer, backdata, bit);
+		for (i = 0; i < N; ++i) {
+			if (data[i] != backdata[i])
+				return -1;
+		}
+	}
+	free(data);
+	free(backdata);
+	free(buffer);
+        free(datazeroes);
+	return 0;
+}
+
+int testshortFORpack() {
+	int bit;
+	size_t i;
+	__m128i * rb;
+	size_t length;
+	uint32_t offset = 7;
+	srand(0);
+	for (bit = 0; bit < 32; ++bit) {
+		const size_t N = 128;
+		uint32_t * data = malloc(N * sizeof(uint32_t));
+		uint32_t * backdata = malloc(N * sizeof(uint32_t));
+		uint32_t * buffer = malloc((2 * N + 1024) * sizeof(uint32_t));
+
+		for (i = 0; i < N; ++i) {
+			data[i] = (rand() & ((1 << bit) - 1)) + offset;
+		}
+		for (length = 0; length <= N; ++length) {
+			for (i = 0; i < N; ++i) {
+				backdata[i] = 0;
+			}
+			rb = simdpackFOR_length(offset,data, length, (__m128i *) buffer,
+					bit);
+		    if(((rb - (__m128i *) buffer)*sizeof(__m128i)) != (unsigned) simdpackFOR_compressedbytes(length,bit)) {
+		      return -1;
+		    }
+			simdunpackFOR_length(offset,(__m128i *) buffer, length,
+					backdata, bit);
+			for (i = 0; i < length; ++i) {
+
+				if (data[i] != backdata[i])
+					return -1;
+			}
+		}
+		free(data);
+		free(backdata);
+		free(buffer);
+	}
+	return 0;
+}
+
+
+#ifdef __AVX2__
+
+int testbabyavx() {
+	int bit;
+	int trial;
+	unsigned int i,j;
+	const size_t N = AVXBlockSize;
+	srand(0);
+	printf("testbabyavx\n");
+	printf("bit = ");
+	for (bit = 0; bit < 32; ++bit) {
+		printf(" %d ",bit);
+		fflush(stdout);
+		for(trial = 0; trial < 100; ++trial) {
+			uint32_t * data = malloc(N * sizeof(uint32_t)+ 64 * sizeof(uint32_t));
+			uint32_t * backdata = malloc(N * sizeof(uint32_t) + 64 * sizeof(uint32_t) );
+			__m256i * buffer = malloc((2 * N + 1024) * sizeof(uint32_t) + 32);
+
+			for (i = 0; i < N; ++i) {
+				data[i] = rand() & ((uint32_t)(1 << bit) - 1);
+			}
+			for (i = 0; i < N; ++i) {
+				backdata[i] = 0;
+			}
+            if(avxmaxbits(data) != maxbits_length(data,N)) {
+            	printf("avxmaxbits is buggy\n");
+				return -1;
+            }
+
+			avxpackwithoutmask(data, buffer, bit);
+			avxunpack(buffer, backdata, bit);
+			for (i = 0; i < AVXBlockSize; ++i) {
+				if (data[i] != backdata[i]) {
+					printf("bug\n");
+					for (j = 0; j < N; ++j) {
+						if (data[j] != backdata[j]) {
+							printf("data[%d]=%d v.s. backdata[%d]=%d\n",j,data[j],j,backdata[j]);
+						} else {
+							printf("data[%d]=%d\n",j,data[j]);
+						}
+					}
+					return -1;
+				}
+			}
+			free(data);
+			free(backdata);
+			free(buffer);
+		}
+	}
+	printf("\n");
+	return 0;
+}
+
+int testavx2() {
+    int N = 5000 * AVXBlockSize, gap;
+    __m256i * buffer = malloc(AVXBlockSize * sizeof(uint32_t));
+    uint32_t * datain = malloc(N * sizeof(uint32_t));
+    uint32_t * backbuffer = malloc(AVXBlockSize * sizeof(uint32_t));
+    for (gap = 1; gap <= 387420489; gap *= 3) {
+        int k;
+        printf(" gap = %u \n", gap);
+        for (k = 0; k < N; ++k)
+            datain[k] = k * gap;
+        for (k = 0; k * AVXBlockSize < N; ++k) {
+            /*
+               First part works for general arrays (sorted or unsorted)
+            */
+            int j;
+       	    /* we compute the bit width */
+            const uint32_t b = avxmaxbits(datain + k * AVXBlockSize);
+            if(avxmaxbits(datain + k * AVXBlockSize) != maxbits_length(datain + k * AVXBlockSize,AVXBlockSize)) {
+            	printf("avxmaxbits is buggy %d %d \n",
+            			avxmaxbits(datain + k * AVXBlockSize),
+						maxbits_length(datain + k * AVXBlockSize,AVXBlockSize));
+				return -1;
+            }
+            printf("bit width = %d\n",b);
+
+
+            /* we read 256 integers at "datain + k * AVXBlockSize" and
+               write b 256-bit vectors at "buffer" */
+            avxpackwithoutmask(datain + k * AVXBlockSize, buffer, b);
+            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
+			avxunpack(buffer, backbuffer, b);/* uncompressed */
+			for (j = 0; j < AVXBlockSize; ++j) {
+				if (backbuffer[j] != datain[k * AVXBlockSize + j]) {
+					int i;
+					printf("bug in avxpack\n");
+					for(i = 0; i < AVXBlockSize; ++i) {
+						printf("data[%d]=%d got back %d %s\n",i,
+								datain[k * AVXBlockSize + i],backbuffer[i],
+								datain[k * AVXBlockSize + i]!=backbuffer[i]?"bug":"");
+					}
+					return -2;
+				}
+			}
+        }
+    }
+    free(buffer);
+    free(datain);
+    free(backbuffer);
+    printf("Code looks good.\n");
+    return 0;
+}
+#endif /* avx2 */
+
+int test() {
+    int N = 5000 * SIMDBlockSize, gap;
+    __m128i * buffer = malloc(SIMDBlockSize * sizeof(uint32_t));
+    uint32_t * datain = malloc(N * sizeof(uint32_t));
+    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
+    for (gap = 1; gap <= 387420489; gap *= 3) {
+        int k;
+        printf(" gap = %u \n", gap);
+        for (k = 0; k < N; ++k)
+            datain[k] = k * gap;
+        for (k = 0; k * SIMDBlockSize < N; ++k) {
+            /*
+               First part works for general arrays (sorted or unsorted)
+            */
+            int j;
+       	    /* we compute the bit width */
+            const uint32_t b = maxbits(datain + k * SIMDBlockSize);
+            /* we read 128 integers at "datain + k * SIMDBlockSize" and
+               write b 128-bit vectors at "buffer" */
+            simdpackwithoutmask(datain + k * SIMDBlockSize, buffer, b);
+            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
+            simdunpack(buffer, backbuffer, b);/* uncompressed */
+            for (j = 0; j < SIMDBlockSize; ++j) {
+                if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
+                    printf("bug in simdpack\n");
+                    return -2;
+                }
+            }
+
+	    {
+                /*
+                 next part assumes that the data is sorted (uses differential coding)
+                */
+                uint32_t offset = 0;
+                /* we compute the bit width */
+                const uint32_t b1 = simdmaxbitsd1(offset,
+                    datain + k * SIMDBlockSize);
+               /* we read 128 integers at "datain + k * SIMDBlockSize" and
+                  write b1 128-bit vectors at "buffer" */
+               simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, buffer,
+                    b1);
+               /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
+               simdunpackd1(offset, buffer, backbuffer, b1);
+               for (j = 0; j < SIMDBlockSize; ++j) {
+                   if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
+                       printf("bug in simdpack d1\n");
+                       return -3;
+                   }
+               }
+               offset = datain[k * SIMDBlockSize + SIMDBlockSize - 1];
+	    }
+        }
+    }
+    free(buffer);
+    free(datain);
+    free(backbuffer);
+    printf("Code looks good.\n");
+    return 0;
+}
+
+#ifdef __SSE4_1__
+int testFOR() {
+    int N = 5000 * SIMDBlockSize, gap;
+    __m128i * buffer = malloc(SIMDBlockSize * sizeof(uint32_t));
+    uint32_t * datain = malloc(N * sizeof(uint32_t));
+    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
+    uint32_t tmax, tmin, tb;
+    for (gap = 1; gap <= 387420489; gap *= 2) {
+        int k;
+        printf(" gap = %u \n", gap);
+        for (k = 0; k < N; ++k)
+            datain[k] = k * gap;
+        for (k = 0; k * SIMDBlockSize < N; ++k) {
+            int j;
+            simdmaxmin_length(datain + k * SIMDBlockSize,SIMDBlockSize,&tmin,&tmax);
+       	    /* we compute the bit width */
+            tb  = bits(tmax - tmin);
+
+
+            /* we read 128 integers at "datain + k * SIMDBlockSize" and
+               write b 128-bit vectors at "buffer" */
+            simdpackFOR(tmin,datain + k * SIMDBlockSize, buffer, tb);
+
+            for (j = 0; j < SIMDBlockSize; ++j) {
+                        uint32_t selectedvalue = simdselectFOR(tmin,buffer,tb,j);
+                    	if (selectedvalue != datain[k * SIMDBlockSize + j]) {
+                            printf("bug in simdselectFOR\n");
+                            return -3;
+                        }
+            }
+            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
+            simdunpackFOR(tmin,buffer, backbuffer, tb);/* uncompressed */
+            for (j = 0; j < SIMDBlockSize; ++j) {
+            	if (backbuffer[j] != datain[k * SIMDBlockSize + j]) {
+                    printf("bug in simdpackFOR\n");
+                    return -2;
+                }
+            }
+        }
+    }
+    free(buffer);
+    free(datain);
+    free(backbuffer);
+    printf("Code looks good.\n");
+    return 0;
+}
+#endif
+
+#define MAX 300
+int test_simdmaxbitsd1_length() {
+    uint32_t result, buffer[MAX + 1];
+    int i, j;
+
+    memset(&buffer[0], 0xff, sizeof(buffer));
+
+    /* this test creates buffers of different length; each buffer is
+     * initialized to result in the following deltas:
+     * length 1: 2
+     * length 2: 1 2
+     * length 3: 1 1 2
+     * length 4: 1 1 1 2
+     * length 5: 1 1 1 1 2
+     * etc. Each sequence's "maxbits" is 2. */
+    for (i = 0; i < MAX; i++) {
+      for (j = 0; j < i; j++)
+        buffer[j] = j + 1;
+      buffer[i] = i + 2;
+
+      result = simdmaxbitsd1_length(0, &buffer[0], i + 1);
+      if (result != 2) {
+        printf("simdmaxbitsd1_length: unexpected result %u in loop %d\n",
+                result, i);
+        return -1;
+      }
+    }
+    printf("simdmaxbitsd1_length: ok\n");
+    return 0;
+}
+
+int uint32_cmp(const void *a, const void *b)
+{
+    const uint32_t *ia = (const uint32_t *)a;
+    const uint32_t *ib = (const uint32_t *)b;
+    if(*ia < *ib)
+    	return -1;
+    else if (*ia > *ib)
+    	return 1;
+    return 0;
+}
+
+#ifdef __SSE4_1__
+int test_simdpackedsearch() {
+    uint32_t buffer[128];
+    uint32_t result = 0;
+    int b, i;
+    uint32_t init = 0;
+    __m128i initial = _mm_set1_epi32(init);
+
+    /* initialize the buffer */
+    for (i = 0; i < 128; i++)
+        buffer[i] = (uint32_t)(i + 1);
+
+    /* this test creates delta encoded buffers with different bits, then
+     * performs lower bound searches for each key */
+    for (b = 1; b <= 32; b++) {
+        uint32_t out[128];
+        /* delta-encode to 'i' bits */
+        simdpackwithoutmaskd1(init, buffer, (__m128i *)out, b);
+        initial = _mm_setzero_si128();
+        printf("simdsearchd1: %d bits\n", b);
+
+        /* now perform the searches */
+        initial = _mm_set1_epi32(init);
+        assert(simdsearchd1(&initial, (__m128i *)out, b, 0, &result) == 0);
+        assert(result > 0);
+
+        for (i = 1; i <= 128; i++) {
+        	initial = _mm_set1_epi32(init);
+            assert(simdsearchd1(&initial, (__m128i *)out, b,
+                                    (uint32_t)i, &result) == i - 1);
+            assert(result == (unsigned)i);
+        }
+        initial = _mm_set1_epi32(init);
+        assert(simdsearchd1(&initial, (__m128i *)out, b, 200, &result)
+                        == 128);
+        assert(result > 200);
+    }
+    printf("simdsearchd1: ok\n");
+    return 0;
+}
+
+int test_simdpackedsearchFOR() {
+    uint32_t buffer[128];
+    uint32_t result = 0;
+    int b;
+    uint32_t i;
+    uint32_t maxv, tmin, tmax, tb;
+    uint32_t out[128];
+
+    /* this test creates delta encoded buffers with different bits, then
+     * performs lower bound searches for each key */
+    for (b = 1; b <= 32; b++) {
+        /* initialize the buffer */
+    	maxv = (b == 32)
+    			? 0xFFFFFFFF
+    					: ((1U<<b) - 1);
+        for (i = 0; i < 128; i++)
+            buffer[i] = maxv * (i + 1) / 128;
+        simdmaxmin_length(buffer,SIMDBlockSize,&tmin,&tmax);
+   	    /* we compute the bit width */
+        tb  = bits(tmax - tmin);
+        /* delta-encode to 'i' bits */
+        simdpackFOR(tmin, buffer, (__m128i *)out, tb);
+        printf("simdsearchd1: %d bits\n", b);
+
+        /* now perform the searches */
+        for (i = 0; i < 128; i++) {
+        	assert(buffer[i] == simdselectFOR(tmin, (__m128i *)out, tb,i));
+        }
+        for (i = 0; i < 128; i++) {
+            int x = simdsearchwithlengthFOR(tmin, (__m128i *)out, tb,
+                                    128,buffer[i], &result) ;
+            assert(simdselectFOR(tmin, (__m128i *)out, tb,x) == buffer[x]);
+            assert(simdselectFOR(tmin, (__m128i *)out, tb,x) == result);
+            assert(buffer[x] == result);
+            assert(result == buffer[i]);
+            assert(buffer[x] == buffer[i]);
+        }
+    }
+    printf("simdsearchFOR: ok\n");
+    return 0;
+}
+
+int test_simdpackedsearch_advanced() {
+    uint32_t buffer[128];
+    uint32_t backbuffer[128];
+	uint32_t out[128];
+    uint32_t result = 0;
+    uint32_t b, i;
+    uint32_t init = 0;
+    __m128i initial = _mm_set1_epi32(init);
+
+
+    /* this test creates delta encoded buffers with different bits, then
+     * performs lower bound searches for each key */
+    for (b = 0; b <= 32; b++) {
+    	uint32_t prev = init;
+        /* initialize the buffer */
+        for (i = 0; i < 128; i++) {
+            buffer[i] =  ((uint32_t)(1431655765 * i + 0xFFFFFFFF)) ;
+            if(b < 32) buffer[i] %= (1<<b);
+        }
+
+        qsort(buffer,128, sizeof(uint32_t), uint32_cmp);
+
+        for (i = 0; i < 128; i++) {
+           buffer[i] = buffer[i] + prev;
+           prev = buffer[i];
+        }
+        for (i = 1; i < 128; i++) {
+        	if(buffer[i] < buffer[i-1] )
+        		buffer[i] = buffer[i-1];
+        }
+        assert(simdmaxbitsd1(init, buffer)<=b);
+        for (i = 0; i < 128; i++) {
+        	out[i] = 0; /* memset would do too */
+        }
+
+        /* delta-encode to 'i' bits */
+        simdpackwithoutmaskd1(init, buffer, (__m128i *)out, b);
+        simdunpackd1(init,  (__m128i *)out, backbuffer, b);
+
+        for (i = 0; i < 128; i++) {
+        	assert(buffer[i] == backbuffer[i]);
+        }
+
+        printf("advanced simdsearchd1: %d bits\n", b);
+
+        for (i = 0; i < 128; i++) {
+        	int pos;
+            initial = _mm_set1_epi32(init);
+        	pos = simdsearchd1(&initial, (__m128i *)out, b,
+                    buffer[i], &result);
+        	assert(pos == simdsearchwithlengthd1(init, (__m128i *)out, b, 128,
+                    buffer[i], &result));
+        	assert(buffer[pos] == buffer[i]);
+            if(pos > 0)
+            	assert(buffer[pos - 1] < buffer[i]);
+            assert(result == buffer[i]);
+        }
+        for (i = 0; i < 128; i++) {
+        	int pos;
+        	if(buffer[i] == 0) continue;
+        	initial = _mm_set1_epi32(init);
+        	pos = simdsearchd1(&initial, (__m128i *)out, b,
+                    buffer[i] - 1, &result);
+        	assert(pos == simdsearchwithlengthd1(init, (__m128i *)out, b, 128,
+                    buffer[i] - 1, &result));
+        	assert(buffer[pos] >= buffer[i]  - 1);
+            if(pos > 0)
+            	assert(buffer[pos - 1] < buffer[i]  - 1);
+            assert(result == buffer[pos]);
+        }
+		for (i = 0; i < 128; i++) {
+			int pos;
+			if (buffer[i] + 1 == 0)
+				continue;
+			initial = _mm_set1_epi32(init);
+			pos = simdsearchd1(&initial, (__m128i *) out, b,
+					buffer[i] + 1, &result);
+			assert(pos == simdsearchwithlengthd1(init, (__m128i *)out, b, 128,
+                    buffer[i] + 1, &result));
+			if(pos == 128) {
+				assert(buffer[i] == buffer[127]);
+			} else {
+			  assert(buffer[pos] >= buffer[i] + 1);
+			  if (pos > 0)
+				assert(buffer[pos - 1] < buffer[i] + 1);
+			  assert(result == buffer[pos]);
+			}
+		}
+    }
+    printf("advanced simdsearchd1: ok\n");
+    return 0;
+}
+
+int test_simdpackedselect() {
+    uint32_t buffer[128];
+    uint32_t initial = 33;
+    int b, i;
+
+    /* initialize the buffer */
+    for (i = 0; i < 128; i++)
+        buffer[i] = (uint32_t)(initial + i);
+
+    /* this test creates delta encoded buffers with different bits, then
+     * performs lower bound searches for each key */
+    for (b = 1; b <= 32; b++) {
+        uint32_t out[128];
+        /* delta-encode to 'i' bits */
+        simdpackwithoutmaskd1(initial, buffer, (__m128i *)out, b);
+
+        printf("simdselectd1: %d bits\n", b);
+
+        /* now perform the searches */
+        for (i = 0; i < 128; i++) {
+            assert(simdselectd1(initial, (__m128i *)out, b, (uint32_t)i)
+                            == initial + i);
+        }
+    }
+    printf("simdselectd1: ok\n");
+    return 0;
+}
+
+int test_simdpackedselect_advanced() {
+    uint32_t buffer[128];
+    uint32_t initial = 33;
+    uint32_t b;
+    int i;
+
+    /* this test creates delta encoded buffers with different bits, then
+     * performs lower bound searches for each key */
+    for (b = 0; b <= 32; b++) {
+        uint32_t prev = initial;
+    	uint32_t out[128];
+        /* initialize the buffer */
+        for (i = 0; i < 128; i++) {
+            buffer[i] =  ((uint32_t)(165576 * i)) ;
+            if(b < 32) buffer[i] %= (1<<b);
+        }
+        for (i = 0; i < 128; i++) {
+           buffer[i] = buffer[i] + prev;
+           prev = buffer[i];
+        }
+
+        for (i = 1; i < 128; i++) {
+        	if(buffer[i] < buffer[i-1] )
+        		buffer[i] = buffer[i-1];
+        }
+        assert(simdmaxbitsd1(initial, buffer)<=b);
+
+        for (i = 0; i < 128; i++) {
+        	out[i] = 0; /* memset would do too */
+        }
+
+        /* delta-encode to 'i' bits */
+        simdpackwithoutmaskd1(initial, buffer, (__m128i *)out, b);
+
+        printf("simdselectd1: %d bits\n", b);
+
+        /* now perform the searches */
+        for (i = 0; i < 128; i++) {
+        	uint32_t valretrieved = simdselectd1(initial, (__m128i *)out, b, (uint32_t)i);
+            assert(valretrieved == buffer[i]);
+        }
+    }
+    printf("advanced simdselectd1: ok\n");
+    return 0;
+}
+#endif
+
+
+int main() {
+    int r;
+    r =  testsetFOR();
+    if (r) {
+         printf("test failure 1\n");
+         return r;
+    }
+
+#ifdef __SSE4_1__
+    r =  testsetd1();
+    if (r) {
+         printf("test failure 2\n");
+         return r;
+    }
+#endif
+    r =  testset();
+    if (r) {
+         printf("test failure 3\n");
+         return r;
+    }
+
+    r = testshortFORpack();
+    if (r) {
+         printf("test failure 4\n");
+         return r;
+    }
+    r = testshortpack();
+    if (r) {
+         printf("test failure 5\n");
+         return r;
+    }
+    r = testlongpack();
+    if (r) {
+         printf("test failure 6\n");
+         return r;
+    }
+#ifdef __SSE4_1__
+    r = test_simdpackedsearchFOR();
+    if (r) {
+         printf("test failure 7\n");
+         return r;
+    }
+
+    r = testFOR();
+    if (r) {
+         printf("test failure 8\n");
+         return r;
+    }
+#endif
+#ifdef __AVX2__
+    r= testbabyavx();
+    if (r) {
+         printf("test failure baby avx\n");
+         return r;
+    }
+
+    r = testavx2();
+    if (r) {
+         printf("test failure 9 avx\n");
+         return r;
+    }
+#endif
+    r = test();
+    if (r) {
+         printf("test failure 9\n");
+         return r;
+    }
+
+    r = test_simdmaxbitsd1_length();
+    if (r) {
+         printf("test failure 10\n");
+         return r;
+    }
+#ifdef __SSE4_1__
+    r = test_simdpackedsearch();
+    if (r) {
+         printf("test failure 11\n");
+         return r;
+    }
+
+    r = test_simdpackedsearch_advanced();
+    if (r) {
+         printf("test failure 12\n");
+         return r;
+    }
+
+    r = test_simdpackedselect();
+    if (r) {
+         printf("test failure 13\n");
+         return r;
+    }
+
+    r = test_simdpackedselect_advanced();
+    if (r) {
+         printf("test failure 14\n");
+         return r;
+    }
+#endif
+    printf("All tests OK!\n");
+
+
+    return 0;
+}
--- a/cpp/simdcomp/tests/unit_chars.c
+++ b/cpp/simdcomp/tests/unit_chars.c
@@ -0,0 +1,102 @@
+/**
+ * This code is released under a BSD License.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include "simdcomp.h"
+
+
+#define get_random_char() (uint8_t)(rand() % 256);
+
+
+int main() {
+    int N = 5000 * SIMDBlockSize, gap;
+    __m128i * buffer = malloc(SIMDBlockSize * sizeof(uint32_t));
+    uint32_t * datain = malloc(N * sizeof(uint32_t));
+    uint32_t * backbuffer = malloc(SIMDBlockSize * sizeof(uint32_t));
+
+    srand(time(NULL));
+
+    for (gap = 1; gap <= 387420489; gap *= 3) {
+        int k;
+        printf(" gap = %u \n", gap);
+
+    /* simulate some random character string, don't care about endiannes */
+        for (k = 0; k < N; ++k) {
+        uint8_t _tmp[4];
+ 
+            _tmp[0] = get_random_char();
+            _tmp[1] = get_random_char();
+            _tmp[2] = get_random_char();
+            _tmp[3] = get_random_char();
+
+            memmove(&datain[k], _tmp, 4);
+        }
+        for (k = 0; k * SIMDBlockSize < N; ++k) {
+            /*
+               First part works for general arrays (sorted or unsorted)
+            */
+            int j;
+               /* we compute the bit width */
+            const uint32_t b = maxbits(datain + k * SIMDBlockSize);
+            /* we read 128 integers at "datain + k * SIMDBlockSize" and
+               write b 128-bit vectors at "buffer" */
+            simdpackwithoutmask(datain + k * SIMDBlockSize, buffer, b);
+            /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
+            simdunpack(buffer, backbuffer, b);/* uncompressed */
+            for (j = 0; j < SIMDBlockSize; ++j) {
+                uint8_t chars_back[4];
+                uint8_t chars_in[4];
+
+                memmove(chars_back, &backbuffer[j], 4);
+                memmove(chars_in, &datain[k * SIMDBlockSize + j], 4);
+
+                if (chars_in[0] != chars_back[0]
+                    || chars_in[1] != chars_back[1]
+                    || chars_in[2] != chars_back[2]
+                    || chars_in[3] != chars_back[3]) {
+                    printf("bug in simdpack\n");
+                    return -2;
+                }
+            }
+
+            {
+                /*
+                 next part assumes that the data is sorted (uses differential coding)
+                */
+                uint32_t offset = 0;
+                /* we compute the bit width */
+                const uint32_t b1 = simdmaxbitsd1(offset,
+                datain + k * SIMDBlockSize);
+                   /* we read 128 integers at "datain + k * SIMDBlockSize" and
+                  write b1 128-bit vectors at "buffer" */
+                   simdpackwithoutmaskd1(offset, datain + k * SIMDBlockSize, buffer,
+                b1);
+                   /* we read back b1 128-bit vectors at "buffer" and write 128 integers at backbuffer */
+                   simdunpackd1(offset, buffer, backbuffer, b1);
+                for (j = 0; j < SIMDBlockSize; ++j) {
+                    uint8_t chars_back[4];
+                    uint8_t chars_in[4];
+
+                    memmove(chars_back, &backbuffer[j], 4);
+                    memmove(chars_in, &datain[k * SIMDBlockSize + j], 4);
+
+                    if (chars_in[0] != chars_back[0]
+                        || chars_in[1] != chars_back[1]
+                        || chars_in[2] != chars_back[2]
+                        || chars_in[3] != chars_back[3]) {
+                        printf("bug in simdpack\n");
+                        return -3;
+                    }
+                }
+                offset = datain[k * SIMDBlockSize + SIMDBlockSize - 1];
+            }
+        }
+    }
+    free(buffer);
+    free(datain);
+    free(backbuffer);
+    printf("Code looks good.\n");
+    return 0;
+}
--- a/cpp/simdcomp_wrapper.c
+++ b/cpp/simdcomp_wrapper.c
@@ -0,0 +1,42 @@
+#include "simdcomp.h"
+#include "simdcomputil.h"
+
+// assumes datain has a size of 128 uint32
+// and that buffer is large enough to host the data.
+size_t compress_sorted(
+        const uint32_t* datain,
+        uint8_t* output,
+        const uint32_t offset) {
+    const uint32_t b = simdmaxbitsd1(offset, datain);
+    *output++ = b;
+    simdpackwithoutmaskd1(offset, datain, (__m128i *) output,  b);
+    return 1 + b * sizeof(__m128i);
+}
+
+// assumes datain has a size of 128 uint32
+// and that buffer is large enough to host the data.
+size_t uncompress_sorted(
+        const uint8_t* compressed_data, 
+        uint32_t* output, 
+        uint32_t offset) {
+    const uint32_t b = *compressed_data++;
+    simdunpackd1(offset, (__m128i *)compressed_data, output, b);
+    return 1 + b * sizeof(__m128i);
+}
+
+size_t compress_unsorted(
+        const uint32_t* datain,
+        uint8_t* output) {
+    const uint32_t b = maxbits(datain);
+    *output++ = b;
+    simdpackwithoutmask(datain, (__m128i *) output,  b);
+    return 1 + b * sizeof(__m128i);
+}
+
+size_t uncompress_unsorted(
+        const uint8_t* compressed_data, 
+        uint32_t* output) {
+    const uint32_t b = *compressed_data++;
+    simdunpack((__m128i *)compressed_data, output, b);
+    return 1 + b * sizeof(__m128i);
+}
--- a/cpp/simdcomp_wrapper.cpp
+++ b/cpp/simdcomp_wrapper.cpp
@@ -1,48 +0,0 @@
-#include <stdio.h>
-#include <time.h>
-#include <stdlib.h>
-#include "simdcomp.h"
-#include "simdcomputil.h"
-
-extern "C" {
-    
-    // assumes datain has a size of 128 uint32
-    // and that buffer is large enough to host the data.
-    size_t compress_sorted_cpp(
-            const uint32_t* datain,
-            uint8_t* output,
-            const uint32_t offset) {
-        const uint32_t b = simdmaxbitsd1(offset, datain);
-        *output++ = b;
-        simdpackwithoutmaskd1(offset, datain, (__m128i *) output,  b);
-        return 1 + b * sizeof(__m128i);;
-    }
-
-    // assumes datain has a size of 128 uint32
-    // and that buffer is large enough to host the data.
-    size_t uncompress_sorted_cpp(
-            const uint8_t* compressed_data, 
-            uint32_t* output, 
-            uint32_t offset) {
-        const uint32_t b = *compressed_data++;
-        simdunpackd1(offset, (__m128i *)compressed_data, output, b);
-        return 1 + b * sizeof(__m128i);
-    }
-
-    size_t compress_unsorted_cpp(
-            const uint32_t* datain,
-            uint8_t* output) {
-        const uint32_t b = maxbits(datain);
-        *output++ = b;
-        simdpackwithoutmask(datain, (__m128i *) output,  b);
-        return 1 + b * sizeof(__m128i);;
-    }
-
-    size_t uncompress_unsorted_cpp(
-            const uint8_t* compressed_data, 
-            uint32_t* output) {
-        const uint32_t b = *compressed_data++;
-        simdunpack((__m128i *)compressed_data, output, b);
-        return 1 + b * sizeof(__m128i);
-    }
-}
--- a/cpp/streamvbyte/.gitignore
+++ b/cpp/streamvbyte/.gitignore
@@ -0,0 +1,32 @@
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+# Debug files
+*.dSYM/
--- a/cpp/streamvbyte/.travis.yml
+++ b/cpp/streamvbyte/.travis.yml
@@ -0,0 +1,7 @@
+language: c
+sudo: false
+compiler:
+  - gcc
+  - clang
+
+script: make && ./unit
--- a/cpp/streamvbyte/LICENSE
+++ b/cpp/streamvbyte/LICENSE
@@ -0,0 +1,202 @@
+Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright {yyyy} {name of copyright owner}
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
--- a/cpp/streamvbyte/README.md
+++ b/cpp/streamvbyte/README.md
@@ -0,0 +1,60 @@
+streamvbyte
+===========
+[![Build Status](https://travis-ci.org/lemire/streamvbyte.png)](https://travis-ci.org/lemire/streamvbyte)
+
+StreamVByte is a new integer compression technique that applies SIMD instructions (vectorization) to
+Google's Group Varint approach. The net result is faster than other byte-oriented compression
+techniques.
+
+The approach is patent-free, the code is available under the Apache License.
+
+
+It includes fast differential coding.
+
+It assumes a recent Intel processor (e.g., haswell or better) .
+
+The code should build using most standard-compliant C99 compilers. The provided makefile
+expects a Linux-like system.
+
+
+Usage:
+
+      make
+      ./unit
+
+See example.c for an example.
+
+Short code sample:
+```C
+// suppose that datain is an array of uint32_t integers
+size_t compsize = streamvbyte_encode(datain, N, compressedbuffer); // encoding
+// here the result is stored in compressedbuffer using compsize bytes
+streamvbyte_decode(compressedbuffer, recovdata, N); // decoding (fast)
+```
+
+If the values are sorted, then it might be preferable to use differential coding:
+```C
+// suppose that datain is an array of uint32_t integers
+size_t compsize = streamvbyte_delta_encode(datain, N, compressedbuffer,0); // encoding
+// here the result is stored in compressedbuffer using compsize bytes
+streamvbyte_delta_decode(compressedbuffer, recovdata, N,0); // decoding (fast)
+```
+You have to know how many integers were coded when you decompress. You can store this 
+information along with the compressed stream.
+
+See also
+--------
+* SIMDCompressionAndIntersection: A C++ library to compress and intersect sorted lists of integers using SIMD instructions https://github.com/lemire/SIMDCompressionAndIntersect
+* The FastPFOR C++ library : Fast integer compression https://github.com/lemire/FastPFor
+* High-performance dictionary coding https://github.com/lemire/dictionary
+* LittleIntPacker: C library to pack and unpack short arrays of integers as fast as possible https://github.com/lemire/LittleIntPacker
+* The SIMDComp library: A simple C library for compressing lists of integers using binary packing https://github.com/lemire/simdcomp
+* MaskedVByte: Fast decoder for VByte-compressed integers https://github.com/lemire/MaskedVByte
+* CSharpFastPFOR: A C#  integer compression library  https://github.com/Genbox/CSharpFastPFOR
+* JavaFastPFOR: A java integer compression library https://github.com/lemire/JavaFastPFOR
+* Encoding: Integer Compression Libraries for Go https://github.com/zhenjl/encoding
+* FrameOfReference is a C++ library dedicated to frame-of-reference (FOR) compression: https://github.com/lemire/FrameOfReference
+* libvbyte: A fast implementation for varbyte 32bit/64bit integer compression https://github.com/cruppstahl/libvbyte
+* TurboPFor is a C library that offers lots of interesting optimizations. Well worth checking! (GPL license) https://github.com/powturbo/TurboPFor
+* Oroch is a C++ library that offers a usable API (MIT license) https://github.com/ademakov/Oroch
+
--- a/cpp/streamvbyte/example.c
+++ b/cpp/streamvbyte/example.c
@@ -0,0 +1,24 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "streamvbyte.h"
+
+int main() {
+	int N = 5000;
+	uint32_t * datain = malloc(N * sizeof(uint32_t));
+	uint8_t * compressedbuffer = malloc(N * sizeof(uint32_t));
+	uint32_t * recovdata = malloc(N * sizeof(uint32_t));
+	for (int k = 0; k < N; ++k)
+		datain[k] = 120;
+	size_t compsize = streamvbyte_encode(datain, N, compressedbuffer); // encoding
+	// here the result is stored in compressedbuffer using compsize bytes
+	size_t compsize2 = streamvbyte_decode(compressedbuffer, recovdata,
+					N); // decoding (fast)
+	assert(compsize == compsize2);
+	free(datain);
+	free(compressedbuffer);
+	free(recovdata);
+	printf("Compressed %d integers down to %d bytes.\n",N,(int) compsize);
+	return 0;
+}
--- a/cpp/streamvbyte/include/streamvbyte.h
+++ b/cpp/streamvbyte/include/streamvbyte.h
@@ -0,0 +1,19 @@
+
+#ifndef VARINTDECODE_H_
+#define VARINTDECODE_H_
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#include <stdint.h>// please use a C99-compatible compiler
+#include <stddef.h>
+
+
+// Encode an array of a given length read from in to bout in varint format.
+// Returns the number of bytes written.
+size_t streamvbyte_encode(const uint32_t *in, uint32_t length, uint8_t *out);
+
+// Read "length" 32-bit integers in varint format from in, storing the result in out.
+// Returns the number of bytes read.
+size_t streamvbyte_decode(const uint8_t* in, uint32_t* out, uint32_t length);
+
+
+#endif /* VARINTDECODE_H_ */
--- a/cpp/streamvbyte/include/streamvbytedelta.h
+++ b/cpp/streamvbyte/include/streamvbytedelta.h
@@ -0,0 +1,24 @@
+/*
+ * streamvbytedelta.h
+ *
+ *  Created on: Apr 14, 2016
+ *      Author: lemire
+ */
+
+#ifndef INCLUDE_STREAMVBYTEDELTA_H_
+#define INCLUDE_STREAMVBYTEDELTA_H_
+
+
+// Encode an array of a given length read from in to bout in StreamVByte format.
+// Returns the number of bytes written.
+// this version uses differential coding (coding differences between values) starting at prev (you can often set prev to zero)
+size_t streamvbyte_delta_encode(const uint32_t *in, uint32_t length, uint8_t *out, uint32_t  prev);
+
+// Read "length" 32-bit integers in StreamVByte format from in, storing the result in out.
+// Returns the number of bytes read.
+// this version uses differential coding (coding differences between values) starting at prev (you can often set prev to zero)
+size_t streamvbyte_delta_decode(const uint8_t* in, uint32_t* out, uint32_t length, uint32_t  prev);
+
+
+
+#endif /* INCLUDE_STREAMVBYTEDELTA_H_ */
--- a/cpp/streamvbyte/makefile
+++ b/cpp/streamvbyte/makefile
@@ -0,0 +1,58 @@
+# minimalist makefile
+.SUFFIXES:
+#
+.SUFFIXES: .cpp .o .c .h
+
+CFLAGS = -fPIC -march=native -std=c99 -O3 -Wall -Wextra -pedantic -Wshadow
+LDFLAGS = -shared
+LIBNAME=libstreamvbyte.so.0.0.1
+all:  unit $(LIBNAME)
+test:
+	./unit
+install: $(OBJECTS)
+	cp $(LIBNAME) /usr/local/lib
+	ln -s /usr/local/lib/$(LIBNAME) /usr/local/lib/libstreamvbyte.so
+	ldconfig
+	cp $(HEADERS) /usr/local/include
+
+
+
+HEADERS=./include/streamvbyte.h ./include/streamvbytedelta.h 
+
+uninstall:
+	for h in $(HEADERS) ; do rm  /usr/local/$$h; done
+	rm  /usr/local/lib/$(LIBNAME)
+	rm /usr/local/lib/libstreamvbyte.so
+	ldconfig
+
+
+OBJECTS= streamvbyte.o streamvbytedelta.o
+
+
+
+streamvbytedelta.o: ./src/streamvbytedelta.c $(HEADERS)
+	$(CC) $(CFLAGS) -c ./src/streamvbytedelta.c -Iinclude
+
+
+streamvbyte.o: ./src/streamvbyte.c $(HEADERS)
+	$(CC) $(CFLAGS) -c ./src/streamvbyte.c -Iinclude
+
+
+
+$(LIBNAME): $(OBJECTS)
+	$(CC) $(CFLAGS) -o $(LIBNAME) $(OBJECTS)  $(LDFLAGS)
+
+
+
+
+example: ./example.c    $(HEADERS) $(OBJECTS)
+	$(CC) $(CFLAGS) -o example ./example.c -Iinclude  $(OBJECTS)
+
+unit: ./tests/unit.c    $(HEADERS) $(OBJECTS)
+	$(CC) $(CFLAGS) -o unit ./tests/unit.c -Iinclude  $(OBJECTS)
+
+dynunit: ./tests/unit.c    $(HEADERS) $(LIBNAME)
+	$(CC) $(CFLAGS) -o dynunit ./tests/unit.c -Iinclude  -lstreamvbyte
+
+clean:
+	rm -f unit *.o $(LIBNAME) example
--- a/cpp/streamvbyte/src/streamvbyte.c
+++ b/cpp/streamvbyte/src/streamvbyte.c
@@ -0,0 +1,495 @@
+#include "streamvbyte.h"
+#if defined(_MSC_VER)
+     /* Microsoft C/C++-compatible compiler */
+     #include <intrin.h>
+#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+     /* GCC-compatible compiler, targeting x86/x86-64 */
+     #include <x86intrin.h>
+#elif defined(__GNUC__) && defined(__ARM_NEON__)
+     /* GCC-compatible compiler, targeting ARM with NEON */
+     #include <arm_neon.h>
+#elif defined(__GNUC__) && defined(__IWMMXT__)
+     /* GCC-compatible compiler, targeting ARM with WMMX */
+     #include <mmintrin.h>
+#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__))
+     /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
+     #include <altivec.h>
+#elif defined(__GNUC__) && defined(__SPE__)
+     /* GCC-compatible compiler, targeting PowerPC with SPE */
+     #include <spe.h>
+#endif
+
+static uint8_t lengthTable[256] = { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9,
+		10, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7, 8,
+		9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10,
+		11, 12, 10, 11, 12, 13, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10,
+		11, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10,
+		8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11,
+		12, 10, 11, 12, 13, 11, 12, 13, 14, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10,
+		11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12,
+		13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10,
+		11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, 7, 8, 9, 10, 8,
+		9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, 12,
+		10, 11, 12, 13, 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12,
+		13, 14, 12, 13, 14, 15, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15,
+		13, 14, 15, 16 };
+
+static uint8_t shuffleTable[256][16] = { { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1,
+		-1, -1, 3, -1, -1, -1 }, // 1111
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 },  // 2111
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 3111
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 4111
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 },  // 1211
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 2211
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 3211
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, -1, -1, -1 },     // 4211
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 1311
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 2311
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1, -1, -1 },     // 3311
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, -1, -1, -1 },      // 4311
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, -1, -1, -1 },    // 1411
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, -1, -1, -1 },     // 2411
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, -1, -1, -1 },      // 3411
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, -1, -1, -1 },       // 4411
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1 },  // 1121
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 },   // 2121
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 3121
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 4121
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 },   // 1221
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 2221
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 3221
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, -1, -1, -1 },      // 4221
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 1321
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 2321
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1, -1, -1 },      // 3321
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, -1, -1, -1 },       // 4321
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, -1, -1, -1 },     // 1421
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, -1, -1, -1 },      // 2421
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, -1, -1, -1 },       // 3421
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, -1, -1, -1 },       // 4421
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1 },   // 1131
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 },    // 2131
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 3131
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 4131
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 },    // 1231
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 2231
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 3231
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, -1, -1, -1 },       // 4231
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 1331
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 2331
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1, -1, -1 },       // 3331
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, -1, -1, -1 },       // 4331
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, -1, -1 },      // 1431
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, -1, -1, -1 },       // 2431
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, -1, -1, -1 },       // 3431
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, -1, -1, -1 },       // 4431
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1 },    // 1141
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 },     // 2141
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 3141
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 4141
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 },     // 1241
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 2241
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 3241
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, -1, -1, -1 },       // 4241
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 1341
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 2341
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, -1, -1, -1 },       // 3341
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, -1, -1, -1 },       // 4341
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1 },       // 1441
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1 },       // 2441
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1 },       // 3441
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1 },       // 4441
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1 },  // 1112
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 },   // 2112
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 3112
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 4112
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 },   // 1212
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 2212
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 3212
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, -1, -1 },      // 4212
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 1312
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 2312
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, -1, -1 },      // 3312
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, -1, -1 },       // 4312
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, -1, -1 },     // 1412
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, -1, -1 },      // 2412
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, -1, -1 },       // 3412
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, -1, -1 },       // 4412
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1 },   // 1122
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 },    // 2122
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 3122
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 4122
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 },    // 1222
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 2222
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 3222
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, -1, -1 },       // 4222
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 1322
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 2322
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, -1, -1 },       // 3322
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, -1, -1 },       // 4322
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, -1, -1 },      // 1422
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, -1, -1 },       // 2422
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, -1, -1 },       // 3422
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, -1, -1 },       // 4422
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1 },    // 1132
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 },     // 2132
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 3132
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 4132
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 },     // 1232
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 2232
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 3232
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, -1, -1 },       // 4232
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 1332
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 2332
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, -1, -1 },       // 3332
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, -1, -1 },       // 4332
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, -1, -1 },       // 1432
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, -1, -1 },       // 2432
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, -1, -1 },       // 3432
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, -1, -1 },       // 4432
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1 },     // 1142
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 },      // 2142
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 3142
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 4142
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 },      // 1242
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 2242
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 3242
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, -1, -1 },       // 4242
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 1342
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 2342
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, -1, -1 },       // 3342
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, -1, -1 },       // 4342
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1 },       // 1442
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1 },       // 2442
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1 },       // 3442
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1 },       // 4442
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1 },   // 1113
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 },    // 2113
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 3113
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 4113
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 },    // 1213
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 2213
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 3213
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, -1 },       // 4213
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 1313
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 2313
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1 },       // 3313
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, -1 },       // 4313
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, -1 },      // 1413
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, -1 },       // 2413
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, -1 },       // 3413
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, -1 },       // 4413
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1 },    // 1123
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 },     // 2123
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 3123
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 4123
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 },     // 1223
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 2223
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 3223
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, -1 },       // 4223
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 1323
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 2323
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, -1 },       // 3323
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, -1 },       // 4323
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, -1 },       // 1423
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, -1 },       // 2423
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, -1 },       // 3423
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, -1 },       // 4423
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1 },     // 1133
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 },      // 2133
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 3133
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 4133
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 },      // 1233
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 2233
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 3233
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, -1 },       // 4233
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 1333
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 2333
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1 },       // 3333
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, -1 },       // 4333
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, -1 },       // 1433
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, -1 },       // 2433
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, -1 },       // 3433
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, -1 },       // 4433
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1 },      // 1143
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 },       // 2143
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 3143
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 4143
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 },       // 1243
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 2243
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 3243
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, -1 },       // 4243
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 1343
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 2343
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, -1 },       // 3343
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, -1 },       // 4343
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1 },       // 1443
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1 },       // 2443
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1 },       // 3443
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -1 },       // 4443
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6 },    // 1114
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 },     // 2114
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 3114
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 4114
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 },     // 1214
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 2214
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 3214
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, 10 },       // 4214
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 1314
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 2314
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, 10 },       // 3314
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, 11 },       // 4314
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, 9 },       // 1414
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, 10 },       // 2414
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, 11 },       // 3414
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, 12 },       // 4414
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7 },     // 1124
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 },      // 2124
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 3124
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 4124
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 },      // 1224
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 2224
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 3224
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, 11 },       // 4224
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 1324
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 2324
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, 11 },       // 3324
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, 12 },       // 4324
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, 10 },       // 1424
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, 11 },       // 2424
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, 12 },       // 3424
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, 13 },       // 4424
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8 },      // 1134
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 },       // 2134
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 3134
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 4134
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 },       // 1234
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 2234
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 3234
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, 12 },       // 4234
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 1334
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 2334
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, 12 },       // 3334
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, 13 },       // 4334
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11 },       // 1434
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12 },       // 2434
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, 13 },       // 3434
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, 14 },       // 4434
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9 },       // 1144
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 },       // 2144
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 3144
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 4144
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 },       // 1244
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 2244
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 3244
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, 13 },       // 4244
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 1344
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 2344
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13 },       // 3344
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, 14 },       // 4344
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 },       // 1444
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 },       // 2444
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 },       // 3444
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }        // 4444
+};
+
+static uint8_t _encode_data(uint32_t val, uint8_t *__restrict__ *dataPtrPtr) {
+	uint8_t *dataPtr = *dataPtrPtr;
+	uint8_t code;
+
+	if (val < (1 << 8)) { // 1 byte
+		*dataPtr = (uint8_t)(val);
+		*dataPtrPtr += 1;
+		code = 0;
+	} else if (val < (1 << 16)) { // 2 bytes
+		*(uint16_t *) dataPtr = (uint16_t)(val);
+		*dataPtrPtr += 2;
+		code = 1;
+	} else if (val < (1 << 24)) { // 3 bytes
+		*(uint16_t *) dataPtr = (uint16_t)(val);
+		*(dataPtr + 2) = (uint8_t)(val >> 16);
+		*dataPtrPtr += 3;
+		code = 2;
+	} else { // 4 bytes
+		*(uint32_t *) dataPtr = val;
+		*dataPtrPtr += 4;
+		code = 3;
+	}
+
+	return code;
+}
+
+static uint8_t *svb_encode_scalar(const uint32_t *in,
+		uint8_t *__restrict__ keyPtr, uint8_t *__restrict__ dataPtr,
+		uint32_t count) {
+	if (count == 0)
+		return dataPtr; // exit immediately if no data
+
+	uint8_t shift = 0; // cycles 0, 2, 4, 6, 0, 2, 4, 6, ...
+	uint8_t key = 0;
+	for (uint32_t c = 0; c < count; c++) {
+		if (shift == 8) {
+			shift = 0;
+			*keyPtr++ = key;
+			key = 0;
+		}
+		uint32_t val = in[c];
+		uint8_t code = _encode_data(val, &dataPtr);
+		key |= code << shift;
+		shift += 2;
+	}
+
+	*keyPtr = key;  // write last key (no increment needed)
+	return dataPtr; // pointer to first unused data byte
+}
+
+// Encode an array of a given length read from in to bout in streamvbyte format.
+// Returns the number of bytes written.
+size_t streamvbyte_encode(const uint32_t *in, uint32_t count, uint8_t *out) {
+	uint8_t *keyPtr = out;
+	uint32_t keyLen = (count + 3) / 4; // 2-bits rounded to full byte
+	uint8_t *dataPtr = keyPtr + keyLen; // variable byte data after all keys
+	return svb_encode_scalar(in, keyPtr, dataPtr, count) - out;
+}
+
+static inline __m128i _decode_avx(uint32_t key,
+		const uint8_t *__restrict__ *dataPtrPtr) {
+	uint8_t len = lengthTable[key];
+	__m128i Data = _mm_loadu_si128((__m128i *) *dataPtrPtr);
+	__m128i Shuf = *(__m128i *) &shuffleTable[key];
+
+	Data = _mm_shuffle_epi8(Data, Shuf);
+	*dataPtrPtr += len;
+	return Data;
+}
+
+static inline void _write_avx(uint32_t *out, __m128i Vec) {
+	_mm_storeu_si128((__m128i *) out, Vec);
+}
+
+static inline uint32_t _decode_data(const uint8_t **dataPtrPtr, uint8_t code) {
+	const uint8_t *dataPtr = *dataPtrPtr;
+	uint32_t val;
+
+	if (code == 0) { // 1 byte
+		val = (uint32_t) * dataPtr;
+		dataPtr += 1;
+	} else if (code == 1) { // 2 bytes
+		val = (uint32_t) * (uint16_t *) dataPtr;
+		dataPtr += 2;
+	} else if (code == 2) { // 3 bytes
+		val = (uint32_t) * (uint16_t *) dataPtr;
+		val |= *(dataPtr + 2) << 16;
+		dataPtr += 3;
+	} else {                      // code == 3
+		val = *(uint32_t *) dataPtr; // 4 bytes
+		dataPtr += 4;
+	}
+
+	*dataPtrPtr = dataPtr;
+	return val;
+}
+static const uint8_t *svb_decode_scalar(uint32_t *outPtr, const uint8_t *keyPtr,
+		const uint8_t *dataPtr, uint32_t count) {
+	if (count == 0)
+		return dataPtr; // no reads or writes if no data
+
+	uint8_t shift = 0;
+	uint32_t key = *keyPtr++;
+	for (uint32_t c = 0; c < count; c++) {
+		if (shift == 8) {
+			shift = 0;
+			key = *keyPtr++;
+		}
+		uint32_t val = _decode_data(&dataPtr, (key >> shift) & 0x3);
+		*outPtr++ = val;
+		shift += 2;
+	}
+
+	return dataPtr; // pointer to first unused byte after end
+}
+
+const uint8_t *svb_decode_avx_simple(uint32_t *out,
+		const uint8_t *__restrict__ keyPtr, const uint8_t *__restrict__ dataPtr,
+		uint64_t count) {
+
+	uint64_t keybytes = count / 4; // number of key bytes
+	__m128i Data;
+	if (keybytes >= 8) {
+
+		int64_t Offset = -(int64_t) keybytes / 8 + 1;
+
+		const uint64_t *keyPtr64 = (const uint64_t *) keyPtr - Offset;
+		uint64_t nextkeys = keyPtr64[Offset];
+		for (; Offset != 0; ++Offset) {
+			uint64_t keys = nextkeys;
+			nextkeys = keyPtr64[Offset + 1];
+
+			Data = _decode_avx((keys & 0xFF), &dataPtr);
+			_write_avx(out, Data);
+			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+			_write_avx(out + 4, Data);
+
+			keys >>= 16;
+			Data = _decode_avx((keys & 0xFF), &dataPtr);
+			_write_avx(out + 8, Data);
+			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+			_write_avx(out + 12, Data);
+
+			keys >>= 16;
+			Data = _decode_avx((keys & 0xFF), &dataPtr);
+			_write_avx(out + 16, Data);
+			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+			_write_avx(out + 20, Data);
+
+			keys >>= 16;
+			Data = _decode_avx((keys & 0xFF), &dataPtr);
+			_write_avx(out + 24, Data);
+			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+			_write_avx(out + 28, Data);
+
+			out += 32;
+		}
+		{
+			uint64_t keys = nextkeys;
+
+			Data = _decode_avx((keys & 0xFF), &dataPtr);
+			_write_avx(out, Data);
+			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+			_write_avx(out + 4, Data);
+
+			keys >>= 16;
+			Data = _decode_avx((keys & 0xFF), &dataPtr);
+			_write_avx(out + 8, Data);
+			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+			_write_avx(out + 12, Data);
+
+			keys >>= 16;
+			Data = _decode_avx((keys & 0xFF), &dataPtr);
+			_write_avx(out + 16, Data);
+			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+			_write_avx(out + 20, Data);
+
+			keys >>= 16;
+			Data = _decode_avx((keys & 0xFF), &dataPtr);
+			_write_avx(out + 24, Data);
+			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+			_write_avx(out + 28, Data);
+
+			out += 32;
+		}
+	}
+	uint64_t consumedkeys = keybytes - (keybytes & 7);
+	return svb_decode_scalar(out, keyPtr + consumedkeys, dataPtr, count & 31);
+}
+
+// Read count 32-bit integers in maskedvbyte format from in, storing the result in out.  Returns the number of bytes read.
+size_t streamvbyte_decode(const uint8_t* in, uint32_t* out, uint32_t count) {
+	if (count == 0)
+		return 0;
+	const uint8_t *keyPtr = in;            // full list of keys is next
+	uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up)
+	const uint8_t *dataPtr = keyPtr + keyLen;  // data starts at end of keys
+	return svb_decode_avx_simple(out, keyPtr, dataPtr, count) - in;
+
+}
--- a/cpp/streamvbyte/src/streamvbytedelta.c
+++ b/cpp/streamvbyte/src/streamvbytedelta.c
@@ -0,0 +1,575 @@
+#include "streamvbyte.h"
+#if defined(_MSC_VER)
+     /* Microsoft C/C++-compatible compiler */
+     #include <intrin.h>
+#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+     /* GCC-compatible compiler, targeting x86/x86-64 */
+     #include <x86intrin.h>
+#elif defined(__GNUC__) && defined(__ARM_NEON__)
+     /* GCC-compatible compiler, targeting ARM with NEON */
+     #include <arm_neon.h>
+#elif defined(__GNUC__) && defined(__IWMMXT__)
+     /* GCC-compatible compiler, targeting ARM with WMMX */
+     #include <mmintrin.h>
+#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__))
+     /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
+     #include <altivec.h>
+#elif defined(__GNUC__) && defined(__SPE__)
+     /* GCC-compatible compiler, targeting PowerPC with SPE */
+     #include <spe.h>
+#endif
+
+static uint8_t lengthTable[256] = { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9,
+		10, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7, 8,
+		9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10,
+		11, 12, 10, 11, 12, 13, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10,
+		11, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10,
+		8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11,
+		12, 10, 11, 12, 13, 11, 12, 13, 14, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10,
+		11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12,
+		13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10,
+		11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, 7, 8, 9, 10, 8,
+		9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, 12,
+		10, 11, 12, 13, 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12,
+		13, 14, 12, 13, 14, 15, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15,
+		13, 14, 15, 16 };
+
+static uint8_t shuffleTable[256][16] = { { 0, -1, -1, -1, 1, -1, -1, -1, 2, -1,
+		-1, -1, 3, -1, -1, -1 }, // 1111
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 },  // 2111
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 3111
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 4111
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1 },  // 1211
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 2211
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 3211
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, -1, -1, -1 },     // 4211
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, -1, -1, -1 },   // 1311
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, -1, -1, -1 },    // 2311
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1, -1, -1 },     // 3311
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, -1, -1, -1 },      // 4311
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, -1, -1, -1 },    // 1411
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, -1, -1, -1 },     // 2411
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, -1, -1, -1 },      // 3411
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, -1, -1, -1 },       // 4411
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1 },  // 1121
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 },   // 2121
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 3121
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 4121
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1 },   // 1221
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 2221
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 3221
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, -1, -1, -1 },      // 4221
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1, -1, -1 },    // 1321
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1, -1, -1 },     // 2321
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1, -1, -1 },      // 3321
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, -1, -1, -1 },       // 4321
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, -1, -1, -1 },     // 1421
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, -1, -1, -1 },      // 2421
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, -1, -1, -1 },       // 3421
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, -1, -1, -1 },       // 4421
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1 },   // 1131
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 },    // 2131
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 3131
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 4131
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1 },    // 1231
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 2231
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 3231
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, -1, -1, -1 },       // 4231
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1, -1, -1 },     // 1331
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1, -1, -1 },      // 2331
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1, -1, -1 },       // 3331
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, -1, -1, -1 },       // 4331
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, -1, -1 },      // 1431
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, -1, -1, -1 },       // 2431
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, -1, -1, -1 },       // 3431
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, -1, -1, -1 },       // 4431
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1 },    // 1141
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 },     // 2141
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 3141
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 4141
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, -1, -1, -1 },     // 1241
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 2241
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 3241
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, -1, -1, -1 },       // 4241
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, -1, -1, -1 },      // 1341
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, -1, -1, -1 },       // 2341
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, -1, -1, -1 },       // 3341
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, -1, -1, -1 },       // 4341
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1 },       // 1441
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1 },       // 2441
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1, -1 },       // 3441
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1 },       // 4441
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1 },  // 1112
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 },   // 2112
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 3112
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 4112
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1 },   // 1212
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 2212
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 3212
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, -1, -1 },      // 4212
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, -1, -1 },    // 1312
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, -1, -1 },     // 2312
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, -1, -1 },      // 3312
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, -1, -1 },       // 4312
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, -1, -1 },     // 1412
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, -1, -1 },      // 2412
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, -1, -1 },       // 3412
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, -1, -1 },       // 4412
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1 },   // 1122
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 },    // 2122
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 3122
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 4122
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1 },    // 1222
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 2222
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 3222
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, -1, -1 },       // 4222
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, -1, -1 },     // 1322
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, -1, -1 },      // 2322
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, -1, -1 },       // 3322
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, -1, -1 },       // 4322
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, -1, -1 },      // 1422
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, -1, -1 },       // 2422
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, -1, -1 },       // 3422
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, -1, -1 },       // 4422
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1 },    // 1132
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 },     // 2132
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 3132
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 4132
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1 },     // 1232
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 2232
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 3232
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, -1, -1 },       // 4232
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, -1, -1 },      // 1332
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, -1 },       // 2332
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, -1, -1 },       // 3332
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, -1, -1 },       // 4332
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, -1, -1 },       // 1432
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, -1, -1 },       // 2432
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, -1, -1 },       // 3432
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, -1, -1 },       // 4432
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1 },     // 1142
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 },      // 2142
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 3142
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 4142
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, -1, -1 },      // 1242
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 2242
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 3242
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, -1, -1 },       // 4242
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, -1, -1 },       // 1342
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, -1, -1 },       // 2342
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, -1, -1 },       // 3342
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, -1, -1 },       // 4342
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1 },       // 1442
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1, -1 },       // 2442
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1 },       // 3442
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1 },       // 4442
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1 },   // 1113
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 },    // 2113
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 3113
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 4113
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1 },    // 1213
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 2213
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 3213
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, -1 },       // 4213
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, -1 },     // 1313
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, -1 },      // 2313
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1 },       // 3313
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, -1 },       // 4313
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, -1 },      // 1413
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, -1 },       // 2413
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, -1 },       // 3413
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, -1 },       // 4413
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1 },    // 1123
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 },     // 2123
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 3123
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 4123
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1 },     // 1223
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 2223
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 3223
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, -1 },       // 4223
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, -1 },      // 1323
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, -1 },       // 2323
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, -1 },       // 3323
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, -1 },       // 4323
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, -1 },       // 1423
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, -1 },       // 2423
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, -1 },       // 3423
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, -1 },       // 4423
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1 },     // 1133
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 },      // 2133
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 3133
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 4133
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1 },      // 1233
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 2233
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 3233
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, -1 },       // 4233
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, -1 },       // 1333
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, -1 },       // 2333
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1 },       // 3333
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, -1 },       // 4333
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, -1 },       // 1433
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, -1 },       // 2433
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, -1 },       // 3433
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, -1 },       // 4433
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1 },      // 1143
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 },       // 2143
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 3143
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 4143
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, -1 },       // 1243
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 2243
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 3243
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, -1 },       // 4243
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, -1 },       // 1343
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, -1 },       // 2343
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, -1 },       // 3343
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, -1 },       // 4343
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -1 },       // 1443
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1 },       // 2443
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1 },       // 3443
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -1 },       // 4443
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6 },    // 1114
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 },     // 2114
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 3114
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 4114
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, 7 },     // 1214
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 2214
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 3214
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1, -1, 7, 8, 9, 10 },       // 4214
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, 8 },      // 1314
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, 9 },       // 2314
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, 10 },       // 3314
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, -1, -1, -1, 8, 9, 10, 11 },       // 4314
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, -1, -1, -1, 6, 7, 8, 9 },       // 1414
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, -1, -1, -1, 7, 8, 9, 10 },       // 2414
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, -1, -1, -1, 8, 9, 10, 11 },       // 3414
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, 9, 10, 11, 12 },       // 4414
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7 },     // 1124
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 },      // 2124
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 3124
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 4124
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, 8 },      // 1224
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 2224
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 3224
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, -1, -1, 8, 9, 10, 11 },       // 4224
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, 9 },       // 1324
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, 10 },       // 2324
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, 11 },       // 3324
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, -1, -1, 9, 10, 11, 12 },       // 4324
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9, 10 },       // 1424
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, -1, -1, 8, 9, 10, 11 },       // 2424
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, -1, -1, 9, 10, 11, 12 },       // 3424
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, 10, 11, 12, 13 },       // 4424
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8 },      // 1134
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 },       // 2134
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 3134
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 4134
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, 9 },       // 1234
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 2234
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 3234
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, -1, 9, 10, 11, 12 },       // 4234
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, 10 },       // 1334
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, 11 },       // 2334
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, 12 },       // 3334
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, -1, 10, 11, 12, 13 },       // 4334
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11 },       // 1434
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12 },       // 2434
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, 13 },       // 3434
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, 11, 12, 13, 14 },       // 4434
+		{ 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9 },       // 1144
+		{ 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 },       // 2144
+		{ 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 3144
+		{ 0, 1, 2, 3, 4, -1, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 4144
+		{ 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, 6, 7, 8, 9, 10 },       // 1244
+		{ 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 2244
+		{ 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 3244
+		{ 0, 1, 2, 3, 4, 5, -1, -1, 6, 7, 8, 9, 10, 11, 12, 13 },       // 4244
+		{ 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, 11 },       // 1344
+		{ 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, 8, 9, 10, 11, 12 },       // 2344
+		{ 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13 },       // 3344
+		{ 0, 1, 2, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 12, 13, 14 },       // 4344
+		{ 0, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 },       // 1444
+		{ 0, 1, -1, -1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 },       // 2444
+		{ 0, 1, 2, -1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 },       // 3444
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }        // 4444
+};
+
+static uint8_t _encode_data(uint32_t val, uint8_t *__restrict__ *dataPtrPtr) {
+	uint8_t *dataPtr = *dataPtrPtr;
+	uint8_t code;
+
+	if (val < (1 << 8)) { // 1 byte
+		*dataPtr = (uint8_t)(val);
+		*dataPtrPtr += 1;
+		code = 0;
+	} else if (val < (1 << 16)) { // 2 bytes
+		*(uint16_t *) dataPtr = (uint16_t)(val);
+		*dataPtrPtr += 2;
+		code = 1;
+	} else if (val < (1 << 24)) { // 3 bytes
+		*(uint16_t *) dataPtr = (uint16_t)(val);
+		*(dataPtr + 2) = (uint8_t)(val >> 16);
+		*dataPtrPtr += 3;
+		code = 2;
+	} else { // 4 bytes
+		*(uint32_t *) dataPtr = val;
+		*dataPtrPtr += 4;
+		code = 3;
+	}
+
+	return code;
+}
+
+static uint8_t *svb_encode_scalar_d1_init(const uint32_t *in,
+		uint8_t *__restrict__ keyPtr, uint8_t *__restrict__ dataPtr,
+		uint32_t count, uint32_t prev) {
+	if (count == 0)
+		return dataPtr; // exit immediately if no data
+
+	uint8_t shift = 0; // cycles 0, 2, 4, 6, 0, 2, 4, 6, ...
+	uint8_t key = 0;
+	for (uint32_t c = 0; c < count; c++) {
+		if (shift == 8) {
+			shift = 0;
+			*keyPtr++ = key;
+			key = 0;
+		}
+		uint32_t val = in[c] - prev;
+		prev = in[c];
+		uint8_t code = _encode_data(val, &dataPtr);
+		key |= code << shift;
+		shift += 2;
+	}
+
+	*keyPtr = key;  // write last key (no increment needed)
+	return dataPtr; // pointer to first unused data byte
+}
+
+size_t streamvbyte_delta_encode(const uint32_t *in, uint32_t count, uint8_t *out,
+		uint32_t prev) {
+	uint8_t *keyPtr = out;         // keys come immediately after 32-bit count
+	uint32_t keyLen = (count + 3) / 4; // 2-bits rounded to full byte
+	uint8_t *dataPtr = keyPtr + keyLen; // variable byte data after all keys
+
+	return svb_encode_scalar_d1_init(in, keyPtr, dataPtr, count, prev) - out;
+
+}
+
+static inline __m128i _decode_avx(uint32_t key, const uint8_t *__restrict__ *dataPtrPtr) {
+	uint8_t len = lengthTable[key];
+	__m128i Data = _mm_loadu_si128((__m128i *) *dataPtrPtr);
+	__m128i Shuf = *(__m128i *) &shuffleTable[key];
+
+	Data = _mm_shuffle_epi8(Data, Shuf);
+	*dataPtrPtr += len;
+
+	return Data;
+}
+#define BroadcastLastXMM 0xFF // bits 0-7 all set to choose highest element
+
+
+
+static inline void _write_avx(uint32_t *out, __m128i Vec) {
+	_mm_storeu_si128((__m128i *) out, Vec);
+}
+
+static __m128i _write_avx_d1(uint32_t *out, __m128i Vec, __m128i Prev) {
+	__m128i Add = _mm_slli_si128(Vec, 4); // Cycle 1: [- A B C] (already done)
+	Prev = _mm_shuffle_epi32(Prev, BroadcastLastXMM); // Cycle 2: [P P P P]
+	Vec = _mm_add_epi32(Vec, Add);                    // Cycle 2: [A AB BC CD]
+	Add = _mm_slli_si128(Vec, 8);                     // Cycle 3: [- - A AB]
+	Vec = _mm_add_epi32(Vec, Prev);                 // Cycle 3: [PA PAB PBC PCD]
+	Vec = _mm_add_epi32(Vec, Add); // Cycle 4: [PA PAB PABC PABCD]
+
+	_write_avx(out, Vec);
+	return Vec;
+}
+
+#ifndef _MSC_VER
+static __m128i High16To32 = {0xFFFF0B0AFFFF0908, 0xFFFF0F0EFFFF0D0C};
+#else
+static __m128i High16To32 = {8,  9,  -1, -1, 10, 11, -1, -1,
+                           12, 13, -1, -1, 14, 15, -1, -1};
+#endif
+
+static inline __m128i _write_16bit_avx_d1(uint32_t *out, __m128i Vec, __m128i Prev) {
+  // vec == [A B C D E F G H] (16 bit values)
+  __m128i Add = _mm_slli_si128(Vec, 2);               // [- A B C D E F G]
+  Prev = _mm_shuffle_epi32(Prev, BroadcastLastXMM); // [P P P P] (32-bit)
+  Vec = _mm_add_epi32(Vec, Add);                    // [A AB BC CD DE FG GH]
+  Add = _mm_slli_si128(Vec, 4);                     // [- - A AB BC CD DE EF]
+  Vec = _mm_add_epi32(Vec, Add);      // [A AB ABC ABCD BCDE CDEF DEFG EFGH]
+  __m128i V1 = _mm_cvtepu16_epi32(Vec); // [A AB ABC ABCD] (32-bit)
+  V1 = _mm_add_epi32(V1, Prev);       // [PA PAB PABC PABCD] (32-bit)
+  __m128i V2 =
+      _mm_shuffle_epi8(Vec, High16To32); // [BCDE CDEF DEFG EFGH] (32-bit)
+  V2 = _mm_add_epi32(V1, V2); // [PABCDE PABCDEF PABCDEFG PABCDEFGH] (32-bit)
+  _write_avx(out, V1);
+  _write_avx(out + 4, V2);
+  return V2;
+}
+
+static inline uint32_t _decode_data(const uint8_t **dataPtrPtr, uint8_t code) {
+	const uint8_t *dataPtr = *dataPtrPtr;
+	uint32_t val;
+
+	if (code == 0) { // 1 byte
+		val = (uint32_t) * dataPtr;
+		dataPtr += 1;
+	} else if (code == 1) { // 2 bytes
+		val = (uint32_t) * (uint16_t *) dataPtr;
+		dataPtr += 2;
+	} else if (code == 2) { // 3 bytes
+		val = (uint32_t) * (uint16_t *) dataPtr;
+		val |= *(dataPtr + 2) << 16;
+		dataPtr += 3;
+	} else {                      // code == 3
+		val = *(uint32_t *) dataPtr; // 4 bytes
+		dataPtr += 4;
+	}
+
+	*dataPtrPtr = dataPtr;
+	return val;
+}
+
+const uint8_t *svb_decode_scalar_d1_init(uint32_t *outPtr, const uint8_t *keyPtr,
+		const uint8_t *dataPtr, uint32_t count,
+                                   uint32_t prev) {
+  if (count == 0)
+    return dataPtr; // no reads or writes if no data
+
+  uint8_t shift = 0;
+  uint32_t key = *keyPtr++;
+
+  for (uint32_t c = 0; c < count; c++) {
+    if (shift == 8) {
+      shift = 0;
+      key = *keyPtr++;
+    }
+    uint32_t val = _decode_data(&dataPtr, (key >> shift) & 0x3);
+    val += prev;
+    *outPtr++ = val;
+    prev = val;
+    shift += 2;
+  }
+
+  return dataPtr; // pointer to first unused byte after end
+}
+
+const uint8_t *svb_decode_avx_d1_init(uint32_t *out, const uint8_t *__restrict__ keyPtr,
+		const uint8_t *__restrict__ dataPtr, uint64_t count, uint32_t prev) {
+	uint64_t keybytes = count / 4; // number of key bytes
+	if (keybytes >= 8) {
+		__m128i Prev = _mm_set1_epi32(prev);
+		__m128i Data;
+
+		int64_t Offset = -(int64_t) keybytes / 8 + 1;
+
+		const uint64_t *keyPtr64 = (const uint64_t *) keyPtr - Offset;
+		uint64_t nextkeys = keyPtr64[Offset];
+		for (; Offset != 0; ++Offset) {
+			uint64_t keys = nextkeys;
+			nextkeys = keyPtr64[Offset + 1];
+			// faster 16-bit delta since we only have 8-bit values
+			if (!keys) { // 32 1-byte ints in a row
+
+				Data = _mm_cvtepu8_epi16(_mm_lddqu_si128((__m128i *) (dataPtr)));
+				Prev = _write_16bit_avx_d1(out, Data, Prev);
+				Data = _mm_cvtepu8_epi16(
+						_mm_lddqu_si128((__m128i *) (dataPtr + 8)));
+				Prev = _write_16bit_avx_d1(out + 8, Data, Prev);
+				Data = _mm_cvtepu8_epi16(
+						_mm_lddqu_si128((__m128i *) (dataPtr + 16)));
+				Prev = _write_16bit_avx_d1(out + 16, Data, Prev);
+				Data = _mm_cvtepu8_epi16(
+						_mm_lddqu_si128((__m128i *) (dataPtr + 24)));
+				Prev = _write_16bit_avx_d1(out + 24, Data, Prev);
+				out += 32;
+				dataPtr += 32;
+				continue;
+			}
+
+			Data = _decode_avx(keys & 0x00FF, &dataPtr);
+			Prev = _write_avx_d1(out, Data, Prev);
+			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+			Prev = _write_avx_d1(out + 4, Data, Prev);
+
+			keys >>= 16;
+			Data = _decode_avx((keys & 0x00FF), &dataPtr);
+			Prev = _write_avx_d1(out + 8, Data, Prev);
+			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+			Prev = _write_avx_d1(out + 12, Data, Prev);
+
+			keys >>= 16;
+			Data = _decode_avx((keys & 0x00FF), &dataPtr);
+			Prev = _write_avx_d1(out + 16, Data, Prev);
+			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+			Prev = _write_avx_d1(out + 20, Data, Prev);
+
+			keys >>= 16;
+			Data = _decode_avx((keys & 0x00FF), &dataPtr);
+			Prev = _write_avx_d1(out + 24, Data, Prev);
+			Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+			Prev = _write_avx_d1(out + 28, Data, Prev);
+
+			out += 32;
+		}
+		{
+			uint64_t keys = nextkeys;
+			// faster 16-bit delta since we only have 8-bit values
+			if (!keys) { // 32 1-byte ints in a row
+				Data = _mm_cvtepu8_epi16(_mm_lddqu_si128((__m128i *) (dataPtr)));
+				Prev = _write_16bit_avx_d1(out, Data, Prev);
+				Data = _mm_cvtepu8_epi16(
+						_mm_lddqu_si128((__m128i *) (dataPtr + 8)));
+				Prev = _write_16bit_avx_d1(out + 8, Data, Prev);
+				Data = _mm_cvtepu8_epi16(
+						_mm_lddqu_si128((__m128i *) (dataPtr + 16)));
+				Prev = _write_16bit_avx_d1(out + 16, Data, Prev);
+				Data = _mm_cvtepu8_epi16(
+						_mm_loadl_epi64((__m128i *) (dataPtr + 24)));
+				Prev = _write_16bit_avx_d1(out + 24, Data, Prev);
+				out += 32;
+				dataPtr += 32;
+
+			} else {
+
+				Data = _decode_avx(keys & 0x00FF, &dataPtr);
+				Prev = _write_avx_d1(out, Data, Prev);
+				Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+				Prev = _write_avx_d1(out + 4, Data, Prev);
+
+				keys >>= 16;
+				Data = _decode_avx((keys & 0x00FF), &dataPtr);
+				Prev = _write_avx_d1(out + 8, Data, Prev);
+				Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+				Prev = _write_avx_d1(out + 12, Data, Prev);
+
+				keys >>= 16;
+				Data = _decode_avx((keys & 0x00FF), &dataPtr);
+				Prev = _write_avx_d1(out + 16, Data, Prev);
+				Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+				Prev = _write_avx_d1(out + 20, Data, Prev);
+
+				keys >>= 16;
+				Data = _decode_avx((keys & 0x00FF), &dataPtr);
+				Prev = _write_avx_d1(out + 24, Data, Prev);
+				Data = _decode_avx((keys & 0xFF00) >> 8, &dataPtr);
+				Prev = _write_avx_d1(out + 28, Data, Prev);
+
+				out += 32;
+			}
+		}
+		prev = out[-1];
+	}
+	uint64_t consumedkeys = keybytes - (keybytes & 7);
+	return svb_decode_scalar_d1_init(out, keyPtr + consumedkeys, dataPtr,
+			count & 31, prev);
+}
+
+size_t streamvbyte_delta_decode(const uint8_t* in, uint32_t* out,
+		uint32_t count, uint32_t prev) {
+	uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up)
+	const uint8_t *keyPtr = in;
+	const uint8_t *dataPtr = keyPtr + keyLen;  // data starts at end of keys
+	return svb_decode_avx_d1_init(out, keyPtr, dataPtr, count, prev) - in;
+}
--- a/cpp/streamvbyte/tests/unit.c
+++ b/cpp/streamvbyte/tests/unit.c
@@ -0,0 +1,73 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "streamvbyte.h"
+#include "streamvbytedelta.h"
+
+int main() {
+	int N = 4096;
+	uint32_t * datain = malloc(N * sizeof(uint32_t));
+	uint8_t * compressedbuffer = malloc(2 * N * sizeof(uint32_t));
+	uint32_t * recovdata = malloc(N * sizeof(uint32_t));
+
+	for (int length = 0; length <= N;) {
+		printf("length = %d \n", length);
+		for (uint32_t gap = 1; gap <= 387420489; gap *= 3) {
+			for (int k = 0; k < length; ++k)
+				datain[k] = gap;
+			size_t compsize = streamvbyte_encode(datain, length,
+					compressedbuffer);
+			size_t usedbytes = streamvbyte_decode(compressedbuffer, recovdata,
+					length);
+			if (compsize != usedbytes) {
+				printf(
+						"[streamvbyte_decode] code is buggy gap = %d, size mismatch %d %d \n",
+						(int) gap, (int) compsize, (int) usedbytes);
+				return -1;
+			}
+			for (int k = 0; k < length; ++k) {
+				if (recovdata[k] != datain[k]) {
+					printf("[streamvbyte_decode] code is buggy gap = %d\n",
+							(int) gap);
+					return -1;
+				}
+			}
+		}
+
+		printf("Delta \n");
+		for (size_t gap = 1; gap <= 531441; gap *= 3) {
+			for (int k = 0; k < length; ++k)
+				datain[k] = gap * k;
+			size_t compsize = streamvbyte_delta_encode(datain, length,
+					compressedbuffer, 0);
+			size_t usedbytes = streamvbyte_delta_decode(compressedbuffer,
+					recovdata, length, 0);
+			if (compsize != usedbytes) {
+				printf(
+						"[streamvbyte_delta_decode] code is buggy gap = %d, size mismatch %d %d \n",
+						(int) gap, (int) compsize, (int) usedbytes);
+				return -1;
+			}
+			for (int k = 0; k < length; ++k) {
+				if (recovdata[k] != datain[k]) {
+					printf(
+							"[streamvbyte_delta_decode] code is buggy gap = %d\n",
+							(int) gap);
+					return -1;
+				}
+			}
+
+		}
+
+		if (length < 128)
+			++length;
+		else {
+			length *= 2;
+		}
+	}
+	free(datain);
+	free(compressedbuffer);
+	free(recovdata);
+	printf("Code looks good.\n");
+	return 0;
+}
--- a/examples/html/simple_search.html
+++ b/examples/html/simple_search.html
@@ -52,7 +52,7 @@
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-2">&#182;</a>
              </div>
-              <p>Let’s create a temporary directory for the 
+              <p>Let’s create a temporary directory for the
 sake of this example</p>

            </div>
@@ -60,7 +60,7 @@ sake of this example</p>
            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">if</span> <span class="hljs-keyword">let</span> <span class="hljs-literal">Ok</span>(dir) = TempDir::new(<span class="hljs-string">"tantivy_example_dir"</span>) {
        run_example(dir.path()).unwrap();
        dir.close().unwrap();
-    }   
+    }
 }


@@ -78,7 +78,7 @@ sake of this example</p>
              <h1 id="defining-the-schema">Defining the schema</h1>
 <p>The Tantivy index requires a very strict schema.
 The schema declares which fields are in the index,
-and for each field, its type and “the way it should 
+and for each field, its type and “the way it should
 be indexed”.</p>

            </div>
@@ -111,12 +111,12 @@ be indexed”.</p>
 We want full-text search for it, and we want to be able
 to retrieve the document after the search.</p>
 <p>TEXT | STORED is some syntactic sugar to describe
-that. </p>
+that.</p>
 <p><code>TEXT</code> means the field should be tokenized and indexed,
 along with its term frequency and term positions.</p>
 <p><code>STORED</code> means that the field will also be saved
 in a compressed, row-oriented key-value store.
-This store is useful to reconstruct the 
+This store is useful to reconstruct the
 documents that were selected during the search phase.</p>

            </div>
@@ -139,7 +139,7 @@ to retrieve the body after the search.</p>
            </div>
            
            <div class="content"><div class='highlight'><pre>    schema_builder.add_text_field(<span class="hljs-string">"body"</span>, TEXT);
-    
+
    <span class="hljs-keyword">let</span> schema = schema_builder.build();</pre></div></div>
            
        </li>
@@ -173,14 +173,12 @@ with our schema in the directory.</p>
 There must be only one writer at a time.
 This single <code>IndexWriter</code> is already
 multithreaded.</p>
-<p>Here we use a buffer of 1 GB. Using a bigger 
-heap for the indexer can increase its throughput.
-This buffer will be split between the indexing
-threads.</p>
+<p>Here we use a buffer of 50MB per thread. Using a bigger
+heap for the indexer can increase its throughput.</p>

            </div>
            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> index_writer = <span class="hljs-built_in">try!</span>(index.writer(<span class="hljs-number">1_000_000_000</span>));</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> index_writer = <span class="hljs-built_in">try!</span>(index.writer(<span class="hljs-number">50_000_000</span>));</pre></div></div>
            
        </li>
        
@@ -213,10 +211,12 @@ one by one in a Document object.</p>
            
            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> title = schema.get_field(<span class="hljs-string">"title"</span>).unwrap();
    <span class="hljs-keyword">let</span> body = schema.get_field(<span class="hljs-string">"body"</span>).unwrap();
-     
+
    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> old_man_doc = Document::<span class="hljs-keyword">default</span>();
    old_man_doc.add_text(title, <span class="hljs-string">"The Old Man and the Sea"</span>);
-    old_man_doc.add_text(body, <span class="hljs-string">"He was an old man who fished alone in a skiff in the Gulf Stream and he had gone eighty-four days now without taking a fish."</span>);</pre></div></div>
+    old_man_doc.add_text(body,
+                         <span class="hljs-string">"He was an old man who fished alone in a skiff in the Gulf Stream and \
+                          he had gone eighty-four days now without taking a fish."</span>);</pre></div></div>
            
        </li>
        
@@ -231,7 +231,7 @@ one by one in a Document object.</p>

            </div>
            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-built_in">try!</span>(index_writer.add_document(old_man_doc));</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    index_writer.add_document(old_man_doc);</pre></div></div>
            
        </li>
        
@@ -248,13 +248,13 @@ a document object directly from json.</p>

            </div>
            
-            <div class="content"><div class='highlight'><pre>    
+            <div class="content"><div class='highlight'><pre>
    <span class="hljs-keyword">let</span> mice_and_men_doc = <span class="hljs-built_in">try!</span>(schema.parse_document(r#<span class="hljs-string">"{
       "</span>title<span class="hljs-string">": "</span>Of Mice and Men<span class="hljs-string">",
       "</span>body<span class="hljs-string">": "</span>few miles south of Soledad, the Salinas River drops <span class="hljs-keyword">in</span> close to the hillside bank and runs deep and green. The water is warm too, <span class="hljs-keyword">for</span> it has slipped twinkling over the yellow sands <span class="hljs-keyword">in</span> the sunlight before reaching the narrow pool. On one side of the river the golden foothill slopes curve up to the strong and rocky Gabilan Mountains, but on the valley side the water is lined with trees—willows fresh and green with every spring, carrying <span class="hljs-keyword">in</span> their lower leaf junctures the debris of the winter’s flooding; and sycamores with mottled, white,recumbent limbs and branches that arch over the pool<span class="hljs-string">"  
    }"</span>#));
-    
-    <span class="hljs-built_in">try!</span>(index_writer.add_document(mice_and_men_doc));</pre></div></div>
+
+    index_writer.add_document(mice_and_men_doc);</pre></div></div>
            
        </li>
        
@@ -275,7 +275,7 @@ The following document has two titles.</p>
       "</span>title<span class="hljs-string">": ["</span>Frankenstein<span class="hljs-string">", "</span>The Modern Promotheus<span class="hljs-string">"],
       "</span>body<span class="hljs-string">": "</span>You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings.  I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence <span class="hljs-keyword">in</span> the success of my undertaking.<span class="hljs-string">"  
    }"</span>#));
-    <span class="hljs-built_in">try!</span>(index_writer.add_document(frankenstein_doc));</pre></div></div>
+    index_writer.add_document(frankenstein_doc);</pre></div></div>
            
        </li>
        
@@ -288,7 +288,7 @@ The following document has two titles.</p>
              </div>
              <p>This is an example, so we will only index 3 documents
 here. You can check out tantivy’s tutorial to index
-the English wikipedia. Tantivy’s indexing is rather fast. 
+the English wikipedia. Tantivy’s indexing is rather fast.
 Indexing 5 million articles of the English wikipedia takes
 around 4 minutes on my computer!</p>

@@ -343,15 +343,13 @@ commit.</p>
                <a class="pilcrow" href="#section-17">&#182;</a>
              </div>
              <h1 id="searching">Searching</h1>
-<p>Let’s search our index. We start
-by creating a searcher. There can be more
-than one searcher at a time.</p>
-<p>You should create a searcher
-every time you start a “search query”.</p>
+<p>Let’s search our index. Start by reloading
+searchers in the index. This should be done
+after every commit().</p>

            </div>
            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> searcher = index.searcher();</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-built_in">try!</span>(index.load_searchers());</pre></div></div>
            
        </li>
        
@@ -362,14 +360,13 @@ every time you start a “search query”.</p>
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-18">&#182;</a>
              </div>
-              <p>The query parser can interpret human queries.
-Here, if the user does not specify which
-field they want to search, tantivy will search
-in both title and body.</p>
+              <p>Afterwards create one (or more) searchers.</p>
+<p>You should create a searcher
+every time you start a “search query”.</p>

            </div>
            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> query_parser = QueryParser::new(index.schema(), <span class="hljs-built_in">vec!</span>(title, body));</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> searcher = index.searcher();</pre></div></div>
            
        </li>
        
@@ -380,6 +377,24 @@ in both title and body.</p>
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-19">&#182;</a>
              </div>
+              <p>The query parser can interpret human queries.
+Here, if the user does not specify which
+field they want to search, tantivy will search
+in both title and body.</p>
+
+            </div>
+            
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> query_parser = QueryParser::new(index.schema(), <span class="hljs-built_in">vec!</span>[title, body]);</pre></div></div>
+            
+        </li>
+        
+        
+        <li id="section-20">
+            <div class="annotation">
+              
+              <div class="pilwrap ">
+                <a class="pilcrow" href="#section-20">&#182;</a>
+              </div>
              <p>QueryParser may fail if the query is not in the right
 format. For user facing applications, this can be a problem.
 A ticket has been opened regarding this problem.</p>
@@ -391,11 +406,11 @@ A ticket has been opened regarding this problem.</p>
        </li>
        
        
-        <li id="section-20">
+        <li id="section-21">
            <div class="annotation">
              
              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-20">&#182;</a>
+                <a class="pilcrow" href="#section-21">&#182;</a>
              </div>
              <p>A query defines a set of documents, as
 well as the way they should be scored.</p>
@@ -408,36 +423,20 @@ any document matching at least one of our terms.</p>
        </li>
        
        
-        <li id="section-21">
-            <div class="annotation">
-              
-              <div class="pilwrap ">
-                <a class="pilcrow" href="#section-21">&#182;</a>
-              </div>
-              <h3 id="collectors">Collectors</h3>
-<p>We are not interested in all of the documents but 
-only in the top 10. Keeping track of our top 10 best documents
-is the role of the TopCollector.</p>
-
-            </div>
-            
-            <div class="content"><div class='highlight'><pre>    
-    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> top_collector = TopCollector::with_limit(<span class="hljs-number">10</span>);</pre></div></div>
-            
-        </li>
-        
-        
        <li id="section-22">
            <div class="annotation">
              
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-22">&#182;</a>
              </div>
-              <p>We can now perform our query.</p>
+              <h3 id="collectors">Collectors</h3>
+<p>We are not interested in all of the documents but
+only in the top 10. Keeping track of our top 10 best documents
+is the role of the TopCollector.</p>

            </div>
            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-built_in">try!</span>(searcher.search(&amp;query, &amp;<span class="hljs-keyword">mut</span> top_collector)));</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> top_collector = TopCollector::with_limit(<span class="hljs-number">10</span>);</pre></div></div>
            
        </li>
        
@@ -448,12 +447,11 @@ is the role of the TopCollector.</p>
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-23">&#182;</a>
              </div>
-              <p>Our top collector now contains the 10 
-most relevant doc ids…</p>
+              <p>We can now perform our query.</p>

            </div>
            
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> doc_addresses = top_collector.docs();</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-built_in">try!</span>(searcher.search(&amp;*query, &amp;<span class="hljs-keyword">mut</span> top_collector));</pre></div></div>
            
        </li>
        
@@ -464,7 +462,23 @@ most relevant doc ids…</p>
              <div class="pilwrap ">
                <a class="pilcrow" href="#section-24">&#182;</a>
              </div>
-              <p>The actual documents still need to be 
+              <p>Our top collector now contains the 10
+most relevant doc ids…</p>
+
+            </div>
+            
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> doc_addresses = top_collector.docs();</pre></div></div>
+            
+        </li>
+        
+        
+        <li id="section-25">
+            <div class="annotation">
+              
+              <div class="pilwrap ">
+                <a class="pilcrow" href="#section-25">&#182;</a>
+              </div>
+              <p>The actual documents still need to be
 retrieved from Tantivy’s store.</p>
 <p>Since the body field was not configured as stored,
 the document returned will only contain
@@ -472,10 +486,10 @@ a title.</p>

            </div>
            
-            <div class="content"><div class='highlight'><pre>    
+            <div class="content"><div class='highlight'><pre>
    <span class="hljs-keyword">for</span> doc_address <span class="hljs-keyword">in</span> doc_addresses {
-         <span class="hljs-keyword">let</span> retrieved_doc = <span class="hljs-built_in">try!</span>(searcher.doc(&amp;doc_address));
-         <span class="hljs-built_in">println!</span>(<span class="hljs-string">"{}"</span>, schema.to_json(&amp;retrieved_doc));
+        <span class="hljs-keyword">let</span> retrieved_doc = <span class="hljs-built_in">try!</span>(searcher.doc(&amp;doc_address));
+        <span class="hljs-built_in">println!</span>(<span class="hljs-string">"{}"</span>, schema.to_json(&amp;retrieved_doc));
    }

    <span class="hljs-literal">Ok</span>(())
--- a/examples/simple_search.rs
+++ b/examples/simple_search.rs
@@ -1,4 +1,3 @@
-extern crate rustc_serialize;
 extern crate tantivy;
 extern crate tempdir;

@@ -10,105 +9,105 @@ use tantivy::collector::TopCollector;
 use tantivy::query::QueryParser;

 fn main() {
-    // Let's create a temporary directory for the 
+    // Let's create a temporary directory for the
    // sake of this example
    if let Ok(dir) = TempDir::new("tantivy_example_dir") {
        run_example(dir.path()).unwrap();
        dir.close().unwrap();
-    }   
+    }
 }


 fn run_example(index_path: &Path) -> tantivy::Result<()> {
-    
-    
+
+
    // # Defining the schema
    //
    // The Tantivy index requires a very strict schema.
    // The schema declares which fields are in the index,
-    // and for each field, its type and "the way it should 
+    // and for each field, its type and "the way it should
    // be indexed".
-    
-    
+
+
    // first we need to define a schema ...
    let mut schema_builder = SchemaBuilder::default();
-    
+
    // Our first field is title.
    // We want full-text search for it, and we want to be able
    // to retrieve the document after the search.
    //
    // TEXT | STORED is some syntactic sugar to describe
-    // that. 
-    // 
+    // that.
+    //
    // `TEXT` means the field should be tokenized and indexed,
    // along with its term frequency and term positions.
    //
    // `STORED` means that the field will also be saved
    // in a compressed, row-oriented key-value store.
-    // This store is useful to reconstruct the 
+    // This store is useful to reconstruct the
    // documents that were selected during the search phase.
    schema_builder.add_text_field("title", TEXT | STORED);
-    
+
    // Our first field is body.
    // We want full-text search for it, and we want to be able
    // to retrieve the body after the search.
    schema_builder.add_text_field("body", TEXT);
-    
-    let schema = schema_builder.build(); 
+
+    let schema = schema_builder.build();



    // # Indexing documents
    //
    // Let's create a brand new index.
-    // 
+    //
    // This will actually just save a meta.json
    // with our schema in the directory.
    let index = try!(Index::create(index_path, schema.clone()));

-    
-    
+
+
    // To insert document we need an index writer.
    // There must be only one writer at a time.
    // This single `IndexWriter` is already
    // multithreaded.
    //
-    // Here we use a buffer of 1 GB. Using a bigger 
+    // Here we use a buffer of 50MB per thread. Using a bigger
    // heap for the indexer can increase its throughput.
-    // This buffer will be split between the indexing
-    // threads.
-    let mut index_writer = try!(index.writer(1_000_000_000));
+    let mut index_writer = try!(index.writer(50_000_000));

    // Let's index our documents!
    // We first need a handle on the title and the body field.
-    
-    
+
+
    // ### Create a document "manually".
    //
    // We can create a document manually, by setting the fields
    // one by one in a Document object.
    let title = schema.get_field("title").unwrap();
    let body = schema.get_field("body").unwrap();
-     
+
    let mut old_man_doc = Document::default();
    old_man_doc.add_text(title, "The Old Man and the Sea");
-    old_man_doc.add_text(body, "He was an old man who fished alone in a skiff in the Gulf Stream and he had gone eighty-four days now without taking a fish.");
-    
+    old_man_doc.add_text(body,
+                         "He was an old man who fished alone in a skiff in the Gulf Stream and \
+                          he had gone eighty-four days now without taking a fish.");
+
    // ... and add it to the `IndexWriter`.
-    try!(index_writer.add_document(old_man_doc));
-    
+    index_writer.add_document(old_man_doc);
+
    // ### Create a document directly from json.
    //
    // Alternatively, we can use our schema to parse
    // a document object directly from json.
-    
+
    let mice_and_men_doc = try!(schema.parse_document(r#"{
       "title": "Of Mice and Men",
       "body": "few miles south of Soledad, the Salinas River drops in close to the hillside bank and runs deep and green. The water is warm too, for it has slipped twinkling over the yellow sands in the sunlight before reaching the narrow pool. On one side of the river the golden foothill slopes curve up to the strong and rocky Gabilan Mountains, but on the valley side the water is lined with trees—willows fresh and green with every spring, carrying in their lower leaf junctures the debris of the winter’s flooding; and sycamores with mottled, white,recumbent limbs and branches that arch over the pool"  
    }"#));
-    
-    try!(index_writer.add_document(mice_and_men_doc));
-    
+
+    index_writer.add_document(mice_and_men_doc);
+
    // Multi-valued field are allowed, they are
    // expressed in JSON by an array.
    // The following document has two titles.
@@ -116,20 +115,20 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
       "title": ["Frankenstein", "The Modern Promotheus"],
       "body": "You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings.  I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence in the success of my undertaking."  
    }"#));
-    try!(index_writer.add_document(frankenstein_doc));
-    
+    index_writer.add_document(frankenstein_doc);
+
    // This is an example, so we will only index 3 documents
    // here. You can check out tantivy's tutorial to index
-    // the English wikipedia. Tantivy's indexing is rather fast. 
+    // the English wikipedia. Tantivy's indexing is rather fast.
    // Indexing 5 million articles of the English wikipedia takes
    // around 4 minutes on my computer!
-    
-    
+
+
    // ### Committing
-    // 
+    //
    // At this point our documents are not searchable.
    //
-    // 
+    //
    // We need to call .commit() explicitly to force the
    // index_writer to finish processing the documents in the queue,
    // flush the current index to the disk, and advertise
@@ -137,22 +136,25 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    //
    // This call is blocking.
    try!(index_writer.commit());
-    
+
    // If `.commit()` returns correctly, then all of the
    // documents that have been added are guaranteed to be
    // persistently indexed.
-    // 
+    //
    // In the scenario of a crash or a power failure,
    // tantivy behaves as if has rolled back to its last
    // commit.
-    
-    
+
+
    // # Searching
    //
-    // Let's search our index. We start
-    // by creating a searcher. There can be more
-    // than one searcher at a time.
-    // 
+    // Let's search our index. Start by reloading
+    // searchers in the index. This should be done
+    // after every commit().
+    try!(index.load_searchers());
+
+    // Afterwards create one (or more) searchers.
+    //
    // You should create a searcher
    // every time you start a "search query".
    let searcher = index.searcher();
@@ -161,46 +163,45 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
    // Here, if the user does not specify which
    // field they want to search, tantivy will search
    // in both title and body.
-    let query_parser = QueryParser::new(index.schema(), vec!(title, body));
-    
+    let query_parser = QueryParser::new(index.schema(), vec![title, body]);
+
    // QueryParser may fail if the query is not in the right
    // format. For user facing applications, this can be a problem.
    // A ticket has been opened regarding this problem.
    let query = try!(query_parser.parse_query("sea whale"));
-    
-    
+
+
    // A query defines a set of documents, as
    // well as the way they should be scored.
-    //  
+    //
    // A query created by the query parser is scored according
    // to a metric called Tf-Idf, and will consider
    // any document matching at least one of our terms.
-    
-    // ### Collectors 
+
+    // ### Collectors
    //
-    // We are not interested in all of the documents but 
+    // We are not interested in all of the documents but
    // only in the top 10. Keeping track of our top 10 best documents
    // is the role of the TopCollector.
-    
    let mut top_collector = TopCollector::with_limit(10);
-    
+
    // We can now perform our query.
    try!(searcher.search(&*query, &mut top_collector));

-    // Our top collector now contains the 10 
+    // Our top collector now contains the 10
    // most relevant doc ids...
    let doc_addresses = top_collector.docs();

-    // The actual documents still need to be 
+    // The actual documents still need to be
    // retrieved from Tantivy's store.
-    // 
+    //
    // Since the body field was not configured as stored,
    // the document returned will only contain
    // a title.
-    
+
    for doc_address in doc_addresses {
-         let retrieved_doc = try!(searcher.doc(&doc_address));
-         println!("{}", schema.to_json(&retrieved_doc));
+        let retrieved_doc = try!(searcher.doc(&doc_address));
+        println!("{}", schema.to_json(&retrieved_doc));
    }

    Ok(())
--- a/src/collector/chained_collector.rs
+++ b/src/collector/chained_collector.rs
@@ -1,7 +1,7 @@
+use Result;
 use collector::Collector;
 use SegmentLocalId;
 use SegmentReader;
-use std::io;
 use DocId;
 use Score;

@@ -12,7 +12,7 @@ use Score;
 pub struct DoNothingCollector;
 impl Collector for DoNothingCollector {
    #[inline]
-    fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> io::Result<()> {
+    fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
        Ok(())
    }
    #[inline]
@@ -38,7 +38,7 @@ impl<Left: Collector, Right: Collector> ChainedCollector<Left, Right> {
 }

 impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Right> {
-    fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> {
+    fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> {
        try!(self.left.set_segment(segment_local_id, segment));
        try!(self.right.set_segment(segment_local_id, segment));
        Ok(())
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -1,7 +1,7 @@
-use std::io;
 use super::Collector;
 use DocId;
 use Score;
+use Result;
 use SegmentReader;
 use SegmentLocalId;

@@ -28,7 +28,7 @@ impl Default for CountCollector {

 impl Collector for CountCollector {

-    fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> io::Result<()> {
+    fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> Result<()> {
        Ok(())
    }

--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -2,7 +2,7 @@ use SegmentReader;
 use SegmentLocalId;
 use DocId;
 use Score;
-use std::io;
+use Result;

 mod count_collector;
 pub use self::count_collector::CountCollector;
@@ -48,14 +48,14 @@ pub use self::chained_collector::chain;
 pub trait Collector {
    /// `set_segment` is called before beginning to enumerate 
    /// on this segment.
-    fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()>;
+    fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()>;
    /// The query pushes the scored document to the collector via this method.
    fn collect(&mut self, doc: DocId, score: Score);
 }


 impl<'a, C: Collector> Collector for &'a mut C {
-    fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> {
+    fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> {
        (*self).set_segment(segment_local_id, segment)
    }
    /// The query pushes the scored document to the collector via this method.
@@ -73,9 +73,9 @@ pub mod tests {
    use DocId;
    use Score;
    use core::SegmentReader;
-    use std::io;
    use SegmentLocalId;
-    use fastfield::U32FastFieldReader;
+    use fastfield::U64FastFieldReader;
+    use fastfield::FastFieldReader;
    use schema::Field;
    
    /// Stores all of the doc ids.
@@ -107,7 +107,7 @@ pub mod tests {

    impl Collector for TestCollector {

-        fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> io::Result<()> {
+        fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
            self.offset += self.segment_max_doc;
            self.segment_max_doc = reader.max_doc();
            Ok(())
@@ -126,9 +126,9 @@ pub mod tests {
    ///
    /// This collector is mainly useful for tests.
    pub struct FastFieldTestCollector {
-        vals: Vec<u32>,
+        vals: Vec<u64>,
        field: Field,
-        ff_reader: Option<U32FastFieldReader>,
+        ff_reader: Option<U64FastFieldReader>,
    }

    impl FastFieldTestCollector {
@@ -140,14 +140,14 @@ pub mod tests {
            }
        }

-        pub fn vals(&self,) -> &Vec<u32> {
-            &self.vals
+        pub fn vals(self,) -> Vec<u64> {
+            self.vals
        }
    }
        
    impl Collector for FastFieldTestCollector {
-        fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> io::Result<()> {
-            self.ff_reader = Some(try!(reader.get_fast_field_reader(self.field)));
+        fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
+            self.ff_reader = Some(reader.get_fast_field_reader(self.field)?);
            Ok(())
        }

--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -1,7 +1,7 @@
-use std::io;
 use super::Collector;
 use DocId;
 use Score;
+use Result;
 use SegmentReader;
 use SegmentLocalId;

@@ -25,7 +25,7 @@ impl<'a> MultiCollector<'a> {


 impl<'a> Collector for MultiCollector<'a> {
-    fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> {
+    fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result<()> {
        for collector in &mut self.collectors {
            try!(collector.set_segment(segment_local_id, segment));
        }
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -1,8 +1,8 @@
-use std::io;
 use super::Collector;
 use SegmentReader;
 use SegmentLocalId;
 use DocAddress;
+use Result;
 use std::collections::BinaryHeap;
 use std::cmp::Ordering;
 use DocId;
@@ -105,7 +105,7 @@ impl TopCollector {

 impl Collector for TopCollector {

-    fn set_segment(&mut self, segment_id: SegmentLocalId, _: &SegmentReader) -> io::Result<()> {
+    fn set_segment(&mut self, segment_id: SegmentLocalId, _: &SegmentReader) -> Result<()> {
        self.segment_id = segment_id;
        Ok(())
    }
--- a/src/common/bitpacker.rs
+++ b/src/common/bitpacker.rs
@@ -4,8 +4,38 @@ use common::serialize::BinarySerializable;
 use std::mem;


-pub fn compute_num_bits(amplitude: u32) -> u8 {
-    (32u32 - amplitude.leading_zeros()) as u8
+/// Computes the number of bits that will be used for bitpacking.
+///
+/// In general the target is the minimum number of bits 
+/// required to express the amplitude given in argument.
+///
+/// e.g. If the amplitude is 10, we can store all ints on simply 4bits.
+/// 
+/// The logic is slightly more convoluted here as for optimization
+/// reasons, we want to ensure that a value spawns over at most 8 bytes
+/// of aligns bytes.
+/// 
+/// Spawning over 9 bytes is possible for instance, if we do 
+/// bitpacking with an amplitude of 63 bits.
+/// In this case, the second int will start on bit
+/// 63 (which belongs to byte 7) and ends at byte 15;  
+/// Hence 9 bytes (from byte 7 to byte 15 included).
+///
+/// To avoid this, we force the number of bits to 64bits
+/// when the result is greater than `64-8 = 56 bits`.
+///
+/// Note that this only affects rare use cases spawning over
+/// a very large range of values. Even in this case, it results
+/// in an extra cost of at most 12% compared to the optimal
+/// number of bits.
+pub fn compute_num_bits(amplitude: u64) -> u8 {
+    let amplitude = (64u32 - amplitude.leading_zeros()) as u8;
+    if amplitude <= 64 - 8 {
+        amplitude
+    }
+    else {
+        64
+    }
 }

 pub struct BitPacker {
@@ -15,7 +45,7 @@ pub struct BitPacker {
    written_size: usize,
 }

-impl BitPacker {   
+impl BitPacker {
    
    pub fn new(num_bits: usize) -> BitPacker {
        BitPacker {
@@ -26,7 +56,7 @@ impl BitPacker {
        }
    }
    
-    pub fn write<TWrite: Write>(&mut self, val: u32, output: &mut TWrite) -> io::Result<()> {
+    pub fn write<TWrite: Write>(&mut self, val: u64, output: &mut TWrite) -> io::Result<()> {
        let val_u64 = val as u64;
        if self.mini_buffer_written + self.num_bits > 64 {
            self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32);
@@ -67,22 +97,29 @@ impl BitPacker {

 pub struct BitUnpacker {
    num_bits: usize,
-    mask: u32,
+    mask: u64,
    data_ptr: *const u8,
    data_len: usize, 
 }

 impl BitUnpacker {
    pub fn new(data: &[u8], num_bits: usize) -> BitUnpacker {
+        let mask: u64 =
+            if num_bits == 64 {
+                !0u64 
+            }
+            else {
+                 (1u64 << num_bits) - 1u64
+            };
        BitUnpacker {
            num_bits: num_bits,
-            mask: (1u32 << num_bits) - 1u32,
+            mask: mask,
            data_ptr: data.as_ptr(),
            data_len: data.len()
        }
    }
    
-    pub fn get(&self, idx: usize) -> u32 {
+    pub fn get(&self, idx: usize) -> u64 {
        if self.num_bits == 0 {
            return 0;
        }
@@ -101,7 +138,7 @@ impl BitUnpacker {
            }
            val_unshifted_unmasked = unsafe { mem::transmute::<[u8; 8], u64>(arr) };
        }
-        let val_shifted = (val_unshifted_unmasked >> bit_shift) as u32;
+        let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
        (val_shifted & self.mask)
    }
        
@@ -123,13 +160,14 @@ mod test {
        assert_eq!(compute_num_bits(4), 3u8);
        assert_eq!(compute_num_bits(255), 8u8);
        assert_eq!(compute_num_bits(256), 9u8);
+        assert_eq!(compute_num_bits(5_000_000_000), 33u8);
    }
    
    fn test_bitpacker_util(len: usize, num_bits: usize) {
        let mut data = Vec::new();
        let mut bitpacker = BitPacker::new(num_bits);
-        let max_val: u32 = (1 << num_bits) - 1;
-        let vals: Vec<u32> = (0u32..len as u32).map(|i| {
+        let max_val: u64 = (1 << num_bits) - 1;
+        let vals: Vec<u64> = (0u64..len as u64).map(|i| {
            if max_val == 0 {
                0
            }
--- a/src/common/counting_writer.rs
+++ b/src/common/counting_writer.rs
@@ -0,0 +1,58 @@
+use std::io::Write;
+use std::io;
+
+
+pub struct CountingWriter<W: Write> {
+    underlying: W,
+    written_bytes: usize,
+}
+
+impl<W: Write> CountingWriter<W> {
+    pub fn wrap(underlying: W) -> CountingWriter<W> {
+        CountingWriter {
+            underlying: underlying,
+            written_bytes: 0,
+        }
+    }
+
+    pub fn written_bytes(&self,) -> usize {
+        self.written_bytes
+    }
+
+    pub fn finish(mut self) -> io::Result<(W, usize)> {
+        self.flush()?;
+        Ok((self.underlying, self.written_bytes))
+    }
+}
+
+impl<W: Write> Write for CountingWriter<W> {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        let written_size = self.underlying.write(buf)?;
+        self.written_bytes += written_size;
+        Ok(written_size)
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        self.underlying.flush()
+    }
+}
+
+
+
+#[cfg(test)]
+mod test {
+
+    use super::CountingWriter;
+    use std::io::Write;
+
+    #[test]
+    fn test_counting_writer() {
+        let buffer: Vec<u8> = vec!();
+        let mut counting_writer = CountingWriter::wrap(buffer);
+        let bytes = (0u8..10u8).collect::<Vec<u8>>();
+        counting_writer.write_all(&bytes).unwrap();
+        let (w, len): (Vec<u8>, usize) = counting_writer.finish().unwrap();
+        assert_eq!(len, 10);
+        assert_eq!(w.len(), 10);
+    }
+}
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -2,6 +2,7 @@ mod serialize;
 mod timer;
 mod vint;
 pub mod bitpacker;
+mod counting_writer;


 pub use self::serialize::BinarySerializable;
@@ -9,10 +10,10 @@ pub use self::timer::Timing;
 pub use self::timer::TimerTree;
 pub use self::timer::OpenTimer;
 pub use self::vint::VInt;
-
-
+pub use self::counting_writer::CountingWriter;
 use std::io;

+/// Create a default io error given a string.
 pub fn make_io_err(msg: String) -> io::Error {
    io::Error::new(io::ErrorKind::Other, msg)
 }
@@ -29,4 +30,53 @@ pub trait HasLen {
    }
 }

+const HIGHEST_BIT: u64 = 1 << 63;

+
+/// Maps `i64` to `u64` so that
+/// `-2^63 .. 2^63-1` is mapped
+///     to 
+/// `0 .. 2^64`
+/// in that order.
+///
+/// This is more suited than simply casting (`val as u64`)
+/// because of bitpacking.
+/// 
+/// Imagine a list of `i64` ranging from -10 to 10.
+/// When casting negative values, the negative values are projected
+/// to values over 2^63, and all values end up requiring 64 bits.
+#[inline(always)]
+pub fn i64_to_u64(val: i64) -> u64 {
+    (val as u64) ^ HIGHEST_BIT
+}
+
+/// Reverse the mapping given by 
+/// `i64_to_u64`.
+#[inline(always)]
+pub fn u64_to_i64(val: u64) -> i64 {
+    (val ^ HIGHEST_BIT) as i64
+}
+
+
+
+#[cfg(test)]
+mod test {
+
+    use super::{i64_to_u64, u64_to_i64};
+
+    fn test_i64_converter_helper(val: i64) {
+        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
+    }
+
+    #[test]
+    fn test_i64_converter() {
+        assert_eq!(i64_to_u64(i64::min_value()), u64::min_value());
+        assert_eq!(i64_to_u64(i64::max_value()), u64::max_value());
+        test_i64_converter_helper(0i64);
+        test_i64_converter_helper(i64::min_value());
+        test_i64_converter_helper(i64::max_value());
+        for i in -1000i64..1000i64 {
+            test_i64_converter_helper(i);
+        }
+    }
+}
--- a/src/common/serialize.rs
+++ b/src/common/serialize.rs
@@ -1,4 +1,3 @@
-
 use byteorder::{ReadBytesExt, WriteBytesExt};
 use byteorder::LittleEndian as Endianness;
 use std::fmt;
@@ -6,20 +5,12 @@ use std::io::Write;
 use std::io::Read;
 use std::io;
 use common::VInt;
-use byteorder;

 pub trait BinarySerializable : fmt::Debug + Sized {
    fn serialize(&self, writer: &mut Write) -> io::Result<usize>;
    fn deserialize(reader: &mut Read) -> io::Result<Self>;
 }

-fn convert_byte_order_error(byteorder_error: byteorder::Error) -> io::Error {
-    match byteorder_error {
-        byteorder::Error::UnexpectedEOF => io::Error::new(io::ErrorKind::InvalidData, "Reached EOF unexpectedly"),
-        byteorder::Error::Io(e) => e,
-    }
-}
-
 impl BinarySerializable for () {
    fn serialize(&self, _: &mut Write) -> io::Result<usize> {
        Ok(0)
@@ -62,12 +53,10 @@ impl BinarySerializable for u32 {
    fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
        writer.write_u32::<Endianness>(*self)
              .map(|_| 4)
-              .map_err(convert_byte_order_error)
    }

    fn deserialize(reader: &mut Read) -> io::Result<u32> {
        reader.read_u32::<Endianness>()
-              .map_err(convert_byte_order_error)
    }
 }

@@ -76,24 +65,30 @@ impl BinarySerializable for u64 {
    fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
        writer.write_u64::<Endianness>(*self)
              .map(|_| 8)
-              .map_err(convert_byte_order_error)
    }
    fn deserialize(reader: &mut Read) -> io::Result<u64> {
        reader.read_u64::<Endianness>()
-              .map_err(convert_byte_order_error)
+    }
+}
+
+impl BinarySerializable for i64 {
+    fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
+        writer.write_i64::<Endianness>(*self)
+              .map(|_| 8)
+    }
+    fn deserialize(reader: &mut Read) -> io::Result<i64> {
+        reader.read_i64::<Endianness>()
    }
 }


 impl BinarySerializable for u8 {
    fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
-        // TODO error
-        try!(writer.write_u8(*self).map_err(convert_byte_order_error));
+        try!(writer.write_u8(*self));
        Ok(1)
    }
    fn deserialize(reader: &mut Read) -> io::Result<u8> {
        reader.read_u8()
-              .map_err(convert_byte_order_error)
    }
 }

@@ -123,7 +118,7 @@ mod test {

    fn serialize_test<T: BinarySerializable + Eq>(v: T, num_bytes: usize) {
        let mut buffer: Vec<u8> = Vec::new();
-        
+
        if num_bytes != 0 {
            assert_eq!(v.serialize(&mut buffer).unwrap(), num_bytes);
            assert_eq!(buffer.len(), num_bytes);
--- a/src/common/timer.rs
+++ b/src/common/timer.rs
@@ -33,7 +33,7 @@ impl<'a> Drop for OpenTimer<'a> {
 }

 /// Timing recording
-#[derive(Debug, RustcEncodable)]
+#[derive(Debug, Serialize)]
 pub struct Timing {
    name: &'static str,
    duration: i64,
@@ -41,7 +41,7 @@ pub struct Timing {
 }

 /// Timer tree
-#[derive(Debug, RustcEncodable)]
+#[derive(Debug, Serialize)]
 pub struct TimerTree {
    timings: Vec<Timing>,
 }
--- a/src/compression/composite.rs
+++ b/src/compression/composite.rs
@@ -110,7 +110,7 @@ pub mod tests {
        let data = generate_array(10_000, 0.1);
        let mut encoder = CompositeEncoder::new();
        let compressed = encoder.compress_unsorted(&data);
-        assert_eq!(compressed.len(), 19_790);
+        assert!(compressed.len() <= 19_794);
        let mut decoder = CompositeDecoder::new();
        let result = decoder.uncompress_unsorted(&compressed, data.len());
        for i in 0..data.len() {
@@ -123,7 +123,7 @@ pub mod tests {
        let data = generate_array(10_000, 0.1);
        let mut encoder = CompositeEncoder::new();
        let compressed = encoder.compress_sorted(&data);
-        assert_eq!(compressed.len(), 7_822);
+        assert!(compressed.len() <= 7_826);
        let mut decoder = CompositeDecoder::new();
        let result = decoder.uncompress_sorted(&compressed, data.len());
        for i in 0..data.len() {
--- a/src/compression/mod.rs
+++ b/src/compression/mod.rs
@@ -4,16 +4,32 @@
 mod composite;
 pub use self::composite::{CompositeEncoder, CompositeDecoder};

-#[cfg(feature="simdcompression")]
-mod compression_simd;
-#[cfg(feature="simdcompression")]
-pub use self::compression_simd::{BlockEncoder, BlockDecoder};
-

 #[cfg(not(feature="simdcompression"))]
-mod compression_nosimd;
-#[cfg(not(feature="simdcompression"))]
-pub use self::compression_nosimd::{BlockEncoder, BlockDecoder};
+mod pack {
+    mod compression_pack_nosimd;
+    pub use self::compression_pack_nosimd::*;
+}
+
+#[cfg(feature="simdcompression")]
+mod pack {
+    mod compression_pack_simd;
+    pub use self::compression_pack_simd::*;
+}
+
+pub use self::pack::{BlockEncoder, BlockDecoder};
+
+#[cfg( any(not(feature="simdcompression"), target_env="msvc") )]
+mod vint {
+    mod compression_vint_nosimd;
+    pub use self::compression_vint_nosimd::*;
+}
+
+#[cfg( all(feature="simdcompression", not(target_env="msvc")) )]
+mod vint {
+    mod compression_vint_simd;
+    pub use self::compression_vint_simd::*;
+}


 pub trait VIntEncoder {
@@ -26,51 +42,16 @@ pub trait VIntDecoder {
    fn uncompress_vint_unsorted<'a>(&mut self, compressed_data: &'a [u8], num_els: usize) -> &'a [u8];
 }

-impl VIntEncoder for BlockEncoder{
+impl VIntEncoder for BlockEncoder {
    
-    fn compress_vint_sorted(&mut self, input: &[u32], mut offset: u32) -> &[u8] {
-        let mut byte_written = 0;
-        for &v in input {
-            let mut to_encode: u32 = v - offset;
-            offset = v;
-            loop {
-                let next_byte: u8 = (to_encode % 128u32) as u8;
-                to_encode /= 128u32;
-                if to_encode == 0u32 {
-                    self.output[byte_written] = next_byte | 128u8;
-                    byte_written += 1;
-                    break;
-                }
-                else {
-                    self.output[byte_written] = next_byte;
-                    byte_written += 1;
-                }
-            }
-        }
-        &self.output[..byte_written]
+    fn compress_vint_sorted(&mut self, input: &[u32], offset: u32) -> &[u8] {
+        vint::compress_sorted(input, &mut self.output, offset)
    }
    
    fn compress_vint_unsorted(&mut self, input: &[u32]) -> &[u8] {
-        let mut byte_written = 0;
-        for &v in input {
-            let mut to_encode: u32 = v;
-            loop {
-                let next_byte: u8 = (to_encode % 128u32) as u8;
-                to_encode /= 128u32;
-                if to_encode == 0u32 {
-                    self.output[byte_written] = next_byte | 128u8;
-                    byte_written += 1;
-                    break;
-                }
-                else {
-                    self.output[byte_written] = next_byte;
-                    byte_written += 1;
-                }
-            }
-        }
-        &self.output[..byte_written]
+        vint::compress_unsorted(input, &mut self.output)
    }
-} 
+}

 impl VIntDecoder for BlockDecoder {
    
@@ -79,52 +60,19 @@ impl VIntDecoder for BlockDecoder {
        compressed_data: &'a [u8],
        offset: u32,
        num_els: usize) -> &'a [u8] {
-        let mut read_byte = 0;
-        let mut result = offset;
-        for i in 0..num_els {
-            let mut shift = 0u32;
-            loop {
-                let cur_byte = compressed_data[read_byte];
-                read_byte += 1;
-                result += ((cur_byte % 128u8) as u32) << shift;
-                if cur_byte & 128u8 != 0u8 {
-                    break;
-                }
-                shift += 7;
-            }
-            self.output[i] = result;
-        }
        self.output_len = num_els;
-        &compressed_data[read_byte..]
+        vint::uncompress_sorted(compressed_data, &mut self.output[..num_els], offset)
    }
    
    fn uncompress_vint_unsorted<'a>(
        &mut self,
        compressed_data: &'a [u8],
        num_els: usize) -> &'a [u8] {
-        let mut read_byte = 0;
-        for i in 0..num_els {
-            let mut result = 0u32;
-            let mut shift = 0u32;
-            loop {
-                let cur_byte = compressed_data[read_byte];
-                read_byte += 1;
-                result += ((cur_byte % 128u8) as u32) << shift;
-                if cur_byte & 128u8 != 0u8 {
-                    break;
-                }
-                shift += 7;
-            }
-            self.output[i] = result;
-        }
        self.output_len = num_els;
-        &compressed_data[read_byte..]
-    }
-    
+        vint::uncompress_unsorted(compressed_data, &mut self.output[..num_els])
+    }   
 }

-    
-    

 pub const NUM_DOCS_PER_BLOCK: usize = 128; //< should be a power of 2 to let the compiler optimize.

@@ -224,7 +172,7 @@ pub mod tests {
    #[test]
    fn test_encode_vint() {
        {
-            let expected_length = 123;
+            let expected_length = 154;
            let mut encoder = BlockEncoder::new();
            let input: Vec<u32> = (0u32..123u32)
                .map(|i| 4 + i * 7 / 2)
@@ -232,23 +180,13 @@ pub mod tests {
                .collect();
            for offset in &[0u32, 1u32, 2u32] {
                let encoded_data = encoder.compress_vint_sorted(&input, *offset);
-                assert_eq!(encoded_data.len(), expected_length);
+                assert!(encoded_data.len() <= expected_length);
                let mut decoder = BlockDecoder::new();
                let remaining_data = decoder.uncompress_vint_sorted(&encoded_data, *offset, input.len());
                assert_eq!(0, remaining_data.len());
                assert_eq!(input, decoder.output_array());
            }
        }
-        {
-            let mut encoder = BlockEncoder::new();
-            let input = vec!(3u32, 17u32, 187u32);
-            let encoded_data = encoder.compress_vint_sorted(&input, 0);
-            assert_eq!(encoded_data.len(), 4);
-            assert_eq!(encoded_data[0], 3u8 + 128u8);
-            assert_eq!(encoded_data[1], (17u8 - 3u8) + 128u8);
-            assert_eq!(encoded_data[2], (187u8 - 17u8 - 128u8));
-            assert_eq!(encoded_data[3], (1u8 + 128u8));
-        }
    }


@@ -272,4 +210,27 @@ pub mod tests {
        });
    }

+
+    const NUM_INTS_BENCH_VINT: usize = 10;
+
+    #[bench]
+    fn bench_compress_vint(b: &mut Bencher) {
+        let mut encoder = BlockEncoder::new();
+        let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
+        b.iter(|| {
+            encoder.compress_vint_sorted(&data, 0u32);
+        });
+    }
+    
+    #[bench]
+    fn bench_uncompress_vint(b: &mut Bencher) {
+        let mut encoder = BlockEncoder::new();
+        let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
+        let compressed = encoder.compress_vint_sorted(&data, 0u32);
+        let mut decoder = BlockDecoder::new(); 
+        b.iter(|| {
+            decoder.uncompress_vint_sorted(compressed, 0u32, NUM_INTS_BENCH_VINT);
+        });
+    }
+
 }
--- a/src/compression/pack/compression_pack_nosimd.rs
+++ b/src/compression/pack/compression_pack_nosimd.rs
@@ -2,7 +2,7 @@ use common::bitpacker::compute_num_bits;
 use common::bitpacker::{BitPacker, BitUnpacker};
 use std::cmp;
 use std::io::Write;
-use super::NUM_DOCS_PER_BLOCK;
+use super::super::NUM_DOCS_PER_BLOCK;

 const COMPRESSED_BLOCK_MAX_SIZE: usize = NUM_DOCS_PER_BLOCK * 4 + 1; 

--- a/src/compression/pack/compression_pack_simd.rs
+++ b/src/compression/pack/compression_pack_simd.rs
@@ -1,44 +1,45 @@
-
-use super::NUM_DOCS_PER_BLOCK;
-
-use libc::size_t;
+use super::super::NUM_DOCS_PER_BLOCK;

 const COMPRESSED_BLOCK_MAX_SIZE: usize = NUM_DOCS_PER_BLOCK * 4 + 1; 

-extern {
-    fn compress_sorted_cpp(
-        data: *const u32,
-        output: *mut u8,
-        offset: u32) -> size_t;
+mod simdcomp {
+    use libc::size_t;

-    fn uncompress_sorted_cpp(
-        compressed_data: *const u8,
-        output: *mut u32,
-        offset: u32) -> size_t;
-        
-    fn compress_unsorted_cpp(
-        data: *const u32,
-        output: *mut u8) -> size_t;
+    extern {
+        pub fn compress_sorted(
+            data: *const u32,
+            output: *mut u8,
+            offset: u32) -> size_t;

-    fn uncompress_unsorted_cpp(
-        compressed_data: *const u8,
-        output: *mut u32) -> size_t;
+        pub fn uncompress_sorted(
+            compressed_data: *const u8,
+            output: *mut u32,
+            offset: u32) -> size_t;
+            
+        pub fn compress_unsorted(
+            data: *const u32,
+            output: *mut u8) -> size_t;
+
+        pub fn uncompress_unsorted(
+            compressed_data: *const u8,
+            output: *mut u32) -> size_t;
+    }
 }

 fn compress_sorted(vals: &[u32], output: &mut [u8], offset: u32) -> usize {
-    unsafe { compress_sorted_cpp(vals.as_ptr(), output.as_mut_ptr(), offset) }
+    unsafe { simdcomp::compress_sorted(vals.as_ptr(), output.as_mut_ptr(), offset) }
 }

 fn uncompress_sorted(compressed_data: &[u8], output: &mut [u32], offset: u32) -> usize {
-    unsafe { uncompress_sorted_cpp(compressed_data.as_ptr(), output.as_mut_ptr(), offset) }
+    unsafe { simdcomp::uncompress_sorted(compressed_data.as_ptr(), output.as_mut_ptr(), offset) }
 }

 fn compress_unsorted(vals: &[u32], output: &mut [u8]) -> usize {
-    unsafe { compress_unsorted_cpp(vals.as_ptr(), output.as_mut_ptr()) }
+    unsafe { simdcomp::compress_unsorted(vals.as_ptr(), output.as_mut_ptr()) }
 }

 fn uncompress_unsorted(compressed_data: &[u8], output: &mut [u32]) -> usize {
-    unsafe { uncompress_unsorted_cpp(compressed_data.as_ptr(), output.as_mut_ptr()) }
+    unsafe { simdcomp::uncompress_unsorted(compressed_data.as_ptr(), output.as_mut_ptr()) }
 }


--- a/src/compression/vint/compression_vint_nosimd.rs
+++ b/src/compression/vint/compression_vint_nosimd.rs
@@ -0,0 +1,92 @@
+
+#[inline(always)]
+pub fn compress_sorted<'a>(input: &[u32], output: &'a mut [u8], mut offset: u32) -> &'a [u8] {
+    let mut byte_written = 0;
+    for &v in input {
+        let mut to_encode: u32 = v - offset;
+        offset = v;
+        loop {
+            let next_byte: u8 = (to_encode % 128u32) as u8;
+            to_encode /= 128u32;
+            if to_encode == 0u32 {
+                output[byte_written] = next_byte | 128u8;
+                byte_written += 1;
+                break;
+            }
+            else {
+                output[byte_written] = next_byte;
+                byte_written += 1;
+            }
+        }
+    }
+    &output[..byte_written]
+}
+
+#[inline(always)]
+pub fn compress_unsorted<'a>(input: &[u32], output: &'a mut [u8]) -> &'a [u8] {
+    let mut byte_written = 0;
+    for &v in input {
+        let mut to_encode: u32 = v;
+        loop {
+            let next_byte: u8 = (to_encode % 128u32) as u8;
+            to_encode /= 128u32;
+            if to_encode == 0u32 {
+                output[byte_written] = next_byte | 128u8;
+                byte_written += 1;
+                break;
+            }
+            else {
+                output[byte_written] = next_byte;
+                byte_written += 1;
+            }
+        }
+    }
+    &output[..byte_written]
+}
+
+#[inline(always)]
+pub fn uncompress_sorted<'a>(
+        compressed_data: &'a [u8],
+        output: &mut [u32],
+        offset: u32) -> &'a [u8] {
+    let mut read_byte = 0;
+    let mut result = offset;
+    let num_els = output.len();
+    for i in 0..num_els {
+        let mut shift = 0u32;
+        loop {
+            let cur_byte = compressed_data[read_byte];
+            read_byte += 1;
+            result += ((cur_byte % 128u8) as u32) << shift;
+            if cur_byte & 128u8 != 0u8 {
+                break;
+            }
+            shift += 7;
+        }
+        output[i] = result;
+    }
+    &compressed_data[read_byte..]
+}
+
+#[inline(always)]
+pub fn uncompress_unsorted<'a>(
+    compressed_data: &'a [u8],
+    output: &mut [u32]) -> &'a [u8] {
+    let mut read_byte = 0;
+    let num_els = output.len();
+    for i in 0..num_els {
+        let mut result = 0u32;
+        let mut shift = 0u32;
+        loop {
+            let cur_byte = compressed_data[read_byte];
+            read_byte += 1;
+            result += ((cur_byte % 128u8) as u32) << shift;
+            if cur_byte & 128u8 != 0u8 {
+                break;
+            }
+            shift += 7;
+        }
+        output[i] = result;
+    }
+    &compressed_data[read_byte..]
+}
--- a/src/compression/vint/compression_vint_simd.rs
+++ b/src/compression/vint/compression_vint_simd.rs
@@ -0,0 +1,82 @@
+
+mod streamvbyte {
+
+    use libc::size_t;
+
+    extern {
+        pub fn streamvbyte_delta_encode(
+            data: *const u32,
+            num_els: u32,
+            output: *mut u8,
+            offset: u32) -> size_t;
+
+        pub fn streamvbyte_delta_decode(
+            compressed_data: *const u8,
+            output: *mut u32,
+            num_els: u32,
+            offset: u32) -> size_t;
+            
+        pub fn streamvbyte_encode(
+            data: *const u32,
+            num_els: u32,
+            output: *mut u8) -> size_t;
+        
+        pub fn streamvbyte_decode(
+            compressed_data: *const u8,
+            output: *mut u32,
+            num_els: usize) -> size_t;
+    }
+}
+
+
+#[inline(always)]
+pub fn compress_sorted<'a>(input: &[u32], output: &'a mut [u8], offset: u32) -> &'a [u8] {
+    let compress_length = unsafe {
+        streamvbyte::streamvbyte_delta_encode(
+            input.as_ptr(),
+            input.len() as u32,
+            output.as_mut_ptr(),
+            offset)
+    };
+    &output[..compress_length]
+}
+
+#[inline(always)]
+pub fn compress_unsorted<'a>(input: &[u32], output: &'a mut [u8]) -> &'a [u8] {
+    let compress_length = unsafe {
+        streamvbyte::streamvbyte_encode(
+            input.as_ptr(),
+            input.len() as u32,
+            output.as_mut_ptr())
+    }; 
+    &output[..compress_length]
+}
+
+#[inline(always)]
+pub fn uncompress_sorted<'a>(
+        compressed_data: &'a [u8],
+        output: &mut [u32],
+        offset: u32) -> &'a [u8] {
+    let consumed_bytes = unsafe {
+        streamvbyte::streamvbyte_delta_decode(
+            compressed_data.as_ptr(),
+            output.as_mut_ptr(),
+            output.len() as u32,
+            offset)
+    };
+    &compressed_data[consumed_bytes..]
+}
+
+#[inline(always)]
+pub fn uncompress_unsorted<'a>(
+    compressed_data: &'a [u8],
+    output: &mut [u32]) -> &'a [u8] {
+    let consumed_bytes = unsafe {
+        streamvbyte::streamvbyte_decode(
+            compressed_data.as_ptr(),
+            output.as_mut_ptr(),
+            output.len())
+    };
+    &compressed_data[consumed_bytes..]
+}
+
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -1,60 +1,54 @@
 use Result;
 use Error;
+use serde_json;
 use schema::Schema;
 use std::sync::Arc;
+use std::borrow::BorrowMut;
 use std::fmt;
-use rustc_serialize::json;
 use core::SegmentId;
 use directory::{Directory, MmapDirectory, RAMDirectory};
-use indexer::IndexWriter;
+use indexer::index_writer::open_index_writer;
 use core::searcher::Searcher;
 use std::convert::From;
 use num_cpus;
 use super::segment::Segment;
 use core::SegmentReader;
 use super::pool::Pool;
+use core::SegmentMeta;
 use super::pool::LeasedItem;
 use std::path::Path;
-use indexer::SegmentManager;
 use core::IndexMeta;
+use IndexWriter;
+use directory::ManagedDirectory;
 use core::META_FILEPATH;
 use super::segment::create_segment;
 use indexer::segment_updater::save_new_metas;

 const NUM_SEARCHERS: usize = 12;

-/// Accessor to the index segment manager
-///
-/// This method is not part of tantivy's public API
-pub fn get_segment_manager(index: &Index) -> Arc<SegmentManager> {
-    index.segment_manager.clone()
-}
-
-
 fn load_metas(directory: &Directory) -> Result<IndexMeta> {
-    let meta_file = try!(directory.open_read(&META_FILEPATH));
-    let meta_content = String::from_utf8_lossy(meta_file.as_slice());
-    json::decode(&meta_content)
+    let meta_data = directory.atomic_read(&META_FILEPATH)?;
+    let meta_string = String::from_utf8_lossy(&meta_data);
+    serde_json::from_str(&meta_string)
        .map_err(|e| Error::CorruptedFile(META_FILEPATH.clone(), Box::new(e)))
 }

 /// Tantivy's Search Index
 pub struct Index {
-    segment_manager: Arc<SegmentManager>,
-
-    directory: Box<Directory>,
+    directory: ManagedDirectory,
    schema: Schema,
    searcher_pool: Arc<Pool<Searcher>>,
-    docstamp: u64,
 }

+
 impl Index {
    /// Creates a new index using the `RAMDirectory`.
    ///
    /// The index will be allocated in anonymous memory.
    /// This should only be used for unit tests.
    pub fn create_in_ram(schema: Schema) -> Index {
-        let directory = Box::new(RAMDirectory::create());
+        let ram_directory = RAMDirectory::create();
+        let directory = ManagedDirectory::new(ram_directory).expect("Creating a managed directory from a brand new RAM directory should never fail.");
        Index::from_directory(directory, schema).expect("Creating a RAMDirectory should never fail") // unwrap is ok here
    }

@@ -63,9 +57,9 @@ impl Index {
    ///
    /// If a previous index was in this directory, then its meta file will be destroyed.
    pub fn create(directory_path: &Path, schema: Schema) -> Result<Index> {
-        let mut directory = MmapDirectory::open(directory_path)?;
-        save_new_metas(schema.clone(), 0, &mut directory)?;
-        Index::from_directory(box directory, schema)
+        let mmap_directory = MmapDirectory::open(directory_path)?;
+        let directory = ManagedDirectory::new(mmap_directory)?;
+        Index::from_directory(directory, schema)
    }

    /// Creates a new index in a temp directory.
@@ -77,49 +71,55 @@ impl Index {
    /// The temp directory is only used for testing the `MmapDirectory`.
    /// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
    pub fn create_from_tempdir(schema: Schema) -> Result<Index> {
-        let directory = Box::new(try!(MmapDirectory::create_from_tempdir()));
+        let mmap_directory = MmapDirectory::create_from_tempdir()?;
+        let directory = ManagedDirectory::new(mmap_directory)?;
        Index::from_directory(directory, schema)
    }

    /// Creates a new index given a directory and an `IndexMeta`.
-    fn create_from_metas(directory: Box<Directory>, metas: IndexMeta) -> Result<Index> {
+    fn create_from_metas(directory: ManagedDirectory, metas: IndexMeta) -> Result<Index> {
        let schema = metas.schema.clone();
-        let docstamp = metas.docstamp;
-        let committed_segments = metas.committed_segments;
-        // TODO log somethings is uncommitted is not empty.
        let index = Index {
-            segment_manager: Arc::new(SegmentManager::from_segments(committed_segments)),
            directory: directory,
            schema: schema,
            searcher_pool: Arc::new(Pool::new()),
-            docstamp: docstamp,
        };
        try!(index.load_searchers());
        Ok(index)
    }

-    /// Opens a new directory from a directory.
-    pub fn from_directory(directory: Box<Directory>, schema: Schema) -> Result<Index> {
+    /// Create a new index from a directory.
+    pub fn from_directory(mut directory: ManagedDirectory, schema: Schema) -> Result<Index> {
+        save_new_metas(schema.clone(), 0, directory.borrow_mut())?;
        Index::create_from_metas(directory, IndexMeta::with_schema(schema))
    }

    /// Opens a new directory from an index path.
    pub fn open(directory_path: &Path) -> Result<Index> {
-        let directory = try!(MmapDirectory::open(directory_path));
-        let metas = try!(load_metas(&directory)); //< TODO does the directory already exists?
-        Index::create_from_metas(directory.box_clone(), metas)
+        let mmap_directory = MmapDirectory::open(directory_path)?;
+        let directory = ManagedDirectory::new(mmap_directory)?;
+        let metas = try!(load_metas(&directory));
+        Index::create_from_metas(directory, metas)
    }

-    /// Returns the index docstamp.
+    /// Returns the index opstamp.
    ///
-    /// The docstamp is the number of documents that have been added
+    /// The opstamp is the number of documents that have been added
    /// from the beginning of time, and until the moment of the last commit.
-    pub fn docstamp(&self) -> u64 {
-        self.docstamp
+    pub fn opstamp(&self) -> u64 {
+        load_metas(self.directory()).unwrap().opstamp
    }

-    /// Creates a multithreaded writer.
-    /// Each writer produces an independent segment.
+    /// Open a new index writer. Attempts to acquire a lockfile.
+    ///
+    /// The lockfile should be deleted on drop, but it is possible
+    /// that due to a panic or other error, a stale lockfile will be
+    /// left in the index directory. If you are sure that no other
+    /// `IndexWriter` on the system is accessing the index directory,
+    /// it is safe to manually delete the lockfile.
+    ///
+    /// num_threads specifies the number of indexing workers that
+    /// should work at the same time.
    ///
    /// # Errors
    /// If the lockfile already exists, returns `Error::FileAlreadyExists`.
@@ -129,12 +129,13 @@ impl Index {
                                   num_threads: usize,
                                   heap_size_in_bytes: usize)
                                   -> Result<IndexWriter> {
-        IndexWriter::open(self, num_threads, heap_size_in_bytes)
+        open_index_writer(self, num_threads, heap_size_in_bytes)
    }


    /// Creates a multithreaded writer
    /// It just calls `writer_with_num_threads` with the number of cores as `num_threads`
+    ///
    /// # Errors
    /// If the lockfile already exists, returns `Error::FileAlreadyExists`.
    /// # Panics
@@ -151,47 +152,47 @@ impl Index {
    }

    /// Returns the list of segments that are searchable
-    pub fn searchable_segments(&self) -> Vec<Segment> {
-        self.searchable_segment_ids()
+    pub fn searchable_segments(&self) -> Result<Vec<Segment>> {
+        Ok(self
+            .searchable_segment_metas()?
            .into_iter()
-            .map(|segment_id| self.segment(segment_id))
-            .collect()
+            .map(|segment_meta| self.segment(segment_meta))
+            .collect())
    }

-    /// Remove all of the file associated with the segment.
-    ///
-    /// This method cannot fail. If a problem occurs,
-    /// some files may end up never being removed.
-    /// The error will only be logged.
-    pub fn delete_segment(&self, segment_id: SegmentId) {
-        self.segment(segment_id).delete();
-    }
-
-    /// Return a segment object given a `segment_id`
-    ///
-    /// The segment may or may not exist.
-    pub fn segment(&self, segment_id: SegmentId) -> Segment {
-        create_segment(self.clone(), segment_id)
-    }
-
-    /// Return a reference to the index directory.
-    pub fn directory(&self) -> &Directory {
-        &*self.directory
-    }
-
-    /// Return a mutable reference to the index directory.
-    pub fn directory_mut(&mut self) -> &mut Directory {
-        &mut *self.directory
-    }
-
-    /// Returns the list of segment ids that are searchable.
-    fn searchable_segment_ids(&self) -> Vec<SegmentId> {
-        self.segment_manager.committed_segments()
+    #[doc(hidden)]
+    pub fn segment(&self, segment_meta: SegmentMeta) -> Segment {
+        create_segment(self.clone(), segment_meta)
    }

    /// Creates a new segment.
    pub fn new_segment(&self) -> Segment {
-        self.segment(SegmentId::generate_random())
+        let segment_meta = SegmentMeta::new(SegmentId::generate_random());
+        create_segment(self.clone(), segment_meta)
+    }
+
+    /// Return a reference to the index directory.
+    pub fn directory(&self) -> &ManagedDirectory {
+        &self.directory
+    }
+
+    /// Return a mutable reference to the index directory.
+    pub fn directory_mut(&mut self) -> &mut ManagedDirectory {
+        &mut self.directory
+    }
+
+    /// Reads the meta.json and returns the list of
+    /// `SegmentMeta` from the last commit.
+    pub fn searchable_segment_metas(&self) -> Result<Vec<SegmentMeta>> {
+        Ok(load_metas(self.directory())?.segments)
+    }
+    
+    /// Returns the list of segment ids that are searchable.
+    pub fn searchable_segment_ids(&self) -> Result<Vec<SegmentId>> {
+        Ok(self.searchable_segment_metas()?
+               .iter()
+               .map(|segment_meta| segment_meta.id())
+               .collect())          
    }

    /// Creates a new generation of searchers after
@@ -200,16 +201,14 @@ impl Index {
    /// This needs to be called when a new segment has been
    /// published or after a merge.
    pub fn load_searchers(&self) -> Result<()> {
-        let searchable_segments = self.searchable_segments();
-        let mut searchers = Vec::new();
-        for _ in 0..NUM_SEARCHERS {
-            let searchable_segments_clone = searchable_segments.clone();
-            let segment_readers: Vec<SegmentReader> = try!(searchable_segments_clone.into_iter()
+        let searchable_segments = self.searchable_segments()?;
+        let segment_readers: Vec<SegmentReader> = try!(searchable_segments
+                .into_iter()
                .map(SegmentReader::open)
                .collect());
-            let searcher = Searcher::from(segment_readers);
-            searchers.push(searcher);
-        }
+        let searchers = (0..NUM_SEARCHERS)
+            .map(|_| Searcher::from(segment_readers.clone()))
+            .collect();
        self.searcher_pool.publish_new_generation(searchers);
        Ok(())
    }
@@ -239,12 +238,9 @@ impl fmt::Debug for Index {
 impl Clone for Index {
    fn clone(&self) -> Index {
        Index {
-            segment_manager: self.segment_manager.clone(),
-
-            directory: self.directory.box_clone(),
+            directory: self.directory.clone(),
            schema: self.schema.clone(),
            searcher_pool: self.searcher_pool.clone(),
-            docstamp: self.docstamp,
        }
    }
 }
--- a/src/core/index_meta.rs
+++ b/src/core/index_meta.rs
@@ -1,7 +1,5 @@
-
 use schema::Schema;
-use core::SegmentId;
-
+use core::SegmentMeta;

 /// Meta information about the `Index`.
 /// 
@@ -11,37 +9,19 @@ use core::SegmentId;
 /// * the index docstamp
 /// * the schema
 ///
-#[derive(Clone,Debug,RustcDecodable,RustcEncodable)]
+#[derive(Clone,Debug,Serialize, Deserialize)]
 pub struct IndexMeta {
-    pub committed_segments: Vec<SegmentMeta>,
-    pub uncommitted_segments: Vec<SegmentMeta>,
+    pub segments: Vec<SegmentMeta>,
    pub schema: Schema,
-    pub docstamp: u64,
+    pub opstamp: u64,
 }

 impl IndexMeta {
    pub fn with_schema(schema: Schema) -> IndexMeta {
        IndexMeta {
-            committed_segments: Vec::new(),
-            uncommitted_segments: Vec::new(),
+            segments: vec!(),
            schema: schema,
-            docstamp: 0u64,
+            opstamp: 0u64,
        }
    }
 }
-
-#[derive(Clone, Debug, RustcDecodable,RustcEncodable)]
-pub struct SegmentMeta {
-    pub segment_id: SegmentId,
-    pub num_docs: u32,
-}
-
-#[cfg(test)]
-impl SegmentMeta {
-    pub fn new(segment_id: SegmentId, num_docs: u32) -> SegmentMeta {
-        SegmentMeta {
-            segment_id: segment_id,
-            num_docs: num_docs,
-        }
-    }
-}
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -1,5 +1,4 @@
 pub mod searcher;
-
 pub mod index;
 mod segment_reader;
 mod segment_id;
@@ -7,20 +6,38 @@ mod segment_component;
 mod segment;
 mod index_meta;
 mod pool;
+mod segment_meta;
+mod term_iterator;

-
-use std::path::PathBuf;
-
+pub use self::searcher::Searcher;
 pub use self::segment_component::SegmentComponent;
 pub use self::segment_id::SegmentId;
 pub use self::segment_reader::SegmentReader;
 pub use self::segment::Segment;
-pub use self::segment::SegmentInfo;
 pub use self::segment::SerializableSegment;
 pub use self::index::Index;
-pub use self::index_meta::{IndexMeta, SegmentMeta};
+pub use self::segment_meta::SegmentMeta;
+pub use self::index_meta::IndexMeta;
+pub use self::term_iterator::TermIterator;


+use std::path::PathBuf;
+
 lazy_static! {
+    /// The meta file contains all the information about the list of segments and the schema
+    /// of the index.
    pub static ref META_FILEPATH: PathBuf = PathBuf::from("meta.json");
+    
+    /// The managed file contains a list of files that were created by the tantivy
+    /// and will therefore be garbage collected when they are deemed useless by tantivy.
+    ///
+    /// Removing this file is safe, but will prevent the garbage collection of all of the file that
+    /// are currently in the directory
+    pub static ref MANAGED_FILEPATH: PathBuf = PathBuf::from(".managed.json");
+
+    /// Only one process should be able to write tantivy's index at a time.
+    /// This file, when present, is in charge of preventing other processes to open an IndexWriter.
+    ///
+    /// If the process is killed and this file remains, it is safe to remove it manually.
+    pub static ref LOCKFILE_FILEPATH: PathBuf = PathBuf::from(".tantivy-indexer.lock");
 }
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -7,6 +7,8 @@ use query::Query;
 use DocId;
 use DocAddress;
 use schema::Term;
+use core::TermIterator;
+use std::fmt;


 /// Holds a list of `SegmentReader`s ready for search.
@@ -14,13 +16,13 @@ use schema::Term;
 /// It guarantees that the `Segment` will not be removed before  
 /// the destruction of the `Searcher`.
 /// 
-#[derive(Debug)]
 pub struct Searcher {
    segment_readers: Vec<SegmentReader>,
 }

+
 impl Searcher {
-      
+
    /// Fetches a document from tantivy's store given a `DocAddress`.
    ///
    /// The searcher uses the segment ordinal to route the
@@ -47,15 +49,27 @@ impl Searcher {
            .map(|segment_reader| segment_reader.doc_freq(term))
            .fold(0u32, |acc, val| acc + val)
    }
-    
+
+    /// Returns a Stream over all of the sorted unique terms of
+    /// the searcher.
+    ///
+    /// This includes all of the fields from all of the segment_readers.
+    /// See [TermIterator](struct.TermIterator.html).
+    ///
+    /// # Warning
+    /// This API is very likely to change in the future.
+    pub fn terms<'a>(&'a self) -> TermIterator<'a> {
+        TermIterator::from(self.segment_readers())
+    }
+
    /// Return the list of segment readers
-    pub fn segment_readers(&self,) -> &Vec<SegmentReader> {
+    pub fn segment_readers(&self,) -> &[SegmentReader] {
        &self.segment_readers
    }
    
    /// Returns the segment_reader associated with the given segment_ordinal
-    pub fn segment_reader(&self, segment_ord: usize) -> &SegmentReader {
-        &self.segment_readers[segment_ord]
+    pub fn segment_reader(&self, segment_ord: u32) -> &SegmentReader {
+        &self.segment_readers[segment_ord as usize]
    }
       
    /// Runs a query on the segment readers wrapped by the searcher
@@ -64,10 +78,21 @@ impl Searcher {
    }
 }

+
 impl From<Vec<SegmentReader>> for Searcher {
    fn from(segment_readers: Vec<SegmentReader>) -> Searcher {
        Searcher {
            segment_readers: segment_readers,
        }
    }
+}
+
+impl fmt::Debug for Searcher {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let segment_ids = self.segment_readers
+            .iter()
+            .map(|segment_reader| segment_reader.segment_id())
+            .collect::<Vec<_>>();
+        write!(f, "Searcher({:?})", segment_ids)
+    }
 }
--- a/src/core/segment.rs
+++ b/src/core/segment.rs
@@ -1,97 +1,89 @@
 use Result;
 use std::path::PathBuf;
 use schema::Schema;
-use DocId;
 use std::fmt;
 use core::SegmentId;
-use directory::{ReadOnlySource, WritePtr};
+use directory::{ReadOnlySource, WritePtr, FileProtection};
 use indexer::segment_serializer::SegmentSerializer;
 use super::SegmentComponent;
 use core::Index;
 use std::result;
-use directory::error::{FileError, OpenWriteError};
-
-
+use directory::Directory;
+use core::SegmentMeta;
+use directory::error::{OpenReadError, OpenWriteError};

 /// A segment is a piece of the index.
 #[derive(Clone)]
 pub struct Segment {
    index: Index,
-    segment_id: SegmentId,
+    meta: SegmentMeta,
 }

 impl fmt::Debug for Segment {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "Segment({:?})", self.segment_id.uuid_string())
+        write!(f, "Segment({:?})", self.id().uuid_string())
    }
 }

-
 /// Creates a new segment given an `Index` and a `SegmentId`
 /// 
 /// The function is here to make it private outside `tantivy`. 
-pub fn create_segment(index: Index, segment_id: SegmentId) -> Segment {
+pub fn create_segment(index: Index, meta: SegmentMeta) -> Segment {
    Segment {
        index: index,
-        segment_id: segment_id,
+        meta: meta,
    }
 }

 impl Segment {
-
-
+    
    /// Returns our index's schema.
    pub fn schema(&self,) -> Schema {
        self.index.schema()
    }

+    /// Returns the segment meta-information
+    pub fn meta(&self) -> &SegmentMeta {
+        &self.meta
+    }
+
+    #[doc(hidden)]
+    pub fn set_delete_meta(&mut self, num_deleted_docs: u32, opstamp: u64) {
+        self.meta.set_delete_meta(num_deleted_docs, opstamp);
+    }
+
    /// Returns the segment's id.
    pub fn id(&self,) -> SegmentId {
-        self.segment_id
+        self.meta.id()
    }
-    

    /// Returns the relative path of a component of our segment.
    ///  
    /// It just joins the segment id with the extension 
    /// associated to a segment component.
    pub fn relative_path(&self, component: SegmentComponent) -> PathBuf {
-        self.segment_id.relative_path(component)
+        self.meta.relative_path(component)
    }

-    /// Deletes all of the document of the segment.
-    /// This is called when there is a merge or a rollback.
+
+    /// Protects a specific component file from being deleted.
    ///
-    /// # Disclaimer
-    /// If deletion of a file fails (e.g. a file 
-    /// was read-only.), the method does not
-    /// fail and just logs an error
-    pub fn delete(&self,) {
-        for component in SegmentComponent::values() {
-            let rel_path = self.relative_path(component);
-            if let Err(err) = self.index.directory().delete(&rel_path) {
-                match err {
-                    FileError::FileDoesNotExist(_) => {
-                        // this is normal behavior.
-                        // the position file for instance may not exists.
-                    }
-                    FileError::IOError(err) => {
-                		error!("Failed to remove {:?} : {:?}", self.segment_id, err);
-                    }
-                }
-            }
-        }
+    /// Returns a FileProtection object. The file is guaranteed
+    /// to not be garbage collected as long as this `FileProtection`  object
+    /// lives.
+    pub fn protect_from_delete(&self, component: SegmentComponent) -> FileProtection {
+        let path = self.relative_path(component);
+        self.index.directory().protect_file_from_delete(&path)
    }

-
-    /// Open one of the component file for read.
-    pub fn open_read(&self, component: SegmentComponent) -> result::Result<ReadOnlySource, FileError> {
+    /// Open one of the component file for a *regular* read.
+    pub fn open_read(&self, component: SegmentComponent) -> result::Result<ReadOnlySource, OpenReadError> {
        let path = self.relative_path(component);
        let source = try!(self.index.directory().open_read(&path));
        Ok(source)
    }

-    /// Open one of the component file for write.
+    /// Open one of the component file for *regular* write.
    pub fn open_write(&mut self, component: SegmentComponent) -> result::Result<WritePtr, OpenWriteError> {
        let path = self.relative_path(component);
        let write = try!(self.index.directory_mut().open_write(&path));
@@ -108,7 +100,34 @@ pub trait SerializableSegment {
    fn write(&self, serializer: SegmentSerializer) -> Result<u32>;
 }

-#[derive(Clone,Debug,RustcDecodable,RustcEncodable)]
-pub struct SegmentInfo {
-	pub max_doc: DocId,
+#[cfg(test)]
+mod tests {
+
+    use core::SegmentComponent;
+    use directory::Directory;
+    use std::collections::HashSet;
+    use schema::SchemaBuilder;
+    use Index;
+
+    #[test]
+    fn test_segment_protect_component() {
+        let mut index = Index::create_in_ram(SchemaBuilder::new().build());
+        let segment = index.new_segment();
+        let path = segment.relative_path(SegmentComponent::POSTINGS);
+        
+        let directory = index.directory_mut();
+        directory.atomic_write(&*path, &vec!(0u8)).unwrap();
+        
+        let living_files = HashSet::new();
+        {
+            let _file_protection = segment.protect_from_delete(SegmentComponent::POSTINGS);
+            assert!(directory.exists(&*path));
+            directory.garbage_collect(living_files.clone());
+            assert!(directory.exists(&*path));
+        }
+
+        directory.garbage_collect(living_files);
+        assert!(!directory.exists(&*path));
+    }
+
 }
--- a/src/core/segment_component.rs
+++ b/src/core/segment_component.rs
@@ -1,41 +1,27 @@
-use std::vec::IntoIter;
-
 #[derive(Copy, Clone)]
 pub enum SegmentComponent {
-    INFO,
    POSTINGS,
    POSITIONS,
    FASTFIELDS,
    FIELDNORMS,
    TERMS,
    STORE,
+    DELETE
 }

 impl SegmentComponent {
-    pub fn values() -> IntoIter<SegmentComponent> {
-        vec!(
-            SegmentComponent::INFO,
+    
+    pub fn iterator() -> impl Iterator<Item=&'static SegmentComponent> {
+        static SEGMENT_COMPONENTS: [SegmentComponent;  7] = [
            SegmentComponent::POSTINGS,
            SegmentComponent::POSITIONS,
            SegmentComponent::FASTFIELDS,
            SegmentComponent::FIELDNORMS,
            SegmentComponent::TERMS,
            SegmentComponent::STORE,
-        ).into_iter()
+            SegmentComponent::DELETE
+        ];
+        SEGMENT_COMPONENTS.into_iter()
    }
    
-    pub fn path_suffix(&self)-> &'static str {
-        match *self {
-            SegmentComponent::POSITIONS => ".pos",
-            SegmentComponent::INFO => ".info",
-            SegmentComponent::POSTINGS => ".idx",
-            SegmentComponent::TERMS => ".term",
-            SegmentComponent::STORE => ".store",
-            SegmentComponent::FASTFIELDS => ".fast",
-            SegmentComponent::FIELDNORMS => ".fieldnorm",
-        }
-    }
-}
-
-
-    
+}
--- a/src/core/segment_id.rs
+++ b/src/core/segment_id.rs
@@ -1,15 +1,19 @@
 use uuid::Uuid;
 use std::fmt;
-use rustc_serialize::{Encoder, Decoder, Encodable, Decodable};
-use core::SegmentComponent;
-use std::path::PathBuf;
 use std::cmp::{Ordering, Ord};

-
 #[cfg(test)]
 use std::sync::atomic;

-#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+/// Tantivy SegmentId.
+///
+/// Tantivy's segment are identified 
+/// by a UUID which is used to prefix the filenames
+/// of all of the file associated with the segment.
+///
+/// In unit test, for reproducability, the SegmentId are
+/// simply generated in an autoincrement fashion.
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub struct SegmentId(Uuid);


@@ -23,12 +27,12 @@ lazy_static! {
 // During tests, we generate the segment id in a autoincrement manner
 // for consistency of segment id between run.
 //
-// The order of the test execution is not guaranteed, but the order 
+// The order of the test execution is not guaranteed, but the order
 // of segments within a single test is guaranteed.
 #[cfg(test)]
 fn create_uuid() -> Uuid {
    let new_auto_inc_id = (*AUTO_INC_COUNTER).fetch_add(1, atomic::Ordering::SeqCst);
-    Uuid::from_fields(new_auto_inc_id as u32, 0, 0, &*EMPTY_ARR)
+    Uuid::from_fields(new_auto_inc_id as u32, 0, 0, &*EMPTY_ARR).unwrap()
 }

 #[cfg(not(test))]
@@ -37,39 +41,32 @@ fn create_uuid() -> Uuid {
 }

 impl SegmentId {
+    #[doc(hidden)]
    pub fn generate_random() -> SegmentId {
        SegmentId(create_uuid())
    }
-    
+
+
+    /// Returns a shorter identifier of the segment.
+    ///
+    /// We are using UUID4, so only 6 bits are fixed,
+    /// and the rest is random.
+    ///
+    /// Picking the first 8 chars is ok to identify 
+    /// segments in a display message.
    pub fn short_uuid_string(&self,) -> String {
-        (&self.0.to_simple_string()[..8]).to_string()
+        (&self.0.simple().to_string()[..8]).to_string()
    }
-    
+
+    /// Returns a segment uuid string.
    pub fn uuid_string(&self,) -> String {
-        self.0.to_simple_string()
-    }
-    
-    pub fn relative_path(&self, component: SegmentComponent) -> PathBuf {
-        let filename = self.uuid_string() + component.path_suffix();
-        PathBuf::from(filename)
-    }
-}
-
-impl Encodable for SegmentId {
-    fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
-        self.0.encode(s)
-    }
-}
-
-impl Decodable for SegmentId {
-    fn decode<D: Decoder>(d: &mut D) -> Result<Self, D::Error> {
-        Uuid::decode(d).map(SegmentId)
+        self.0.simple().to_string()
    }
 }

 impl fmt::Debug for SegmentId {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "SegmentId({:?})", self.uuid_string())
+        write!(f, "Seg({:?})", self.short_uuid_string())
    }
 }

--- a/src/core/segment_meta.rs
+++ b/src/core/segment_meta.rs
@@ -0,0 +1,121 @@
+use core::SegmentId;
+use super::SegmentComponent;
+use std::path::PathBuf;
+use std::collections::HashSet;
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+struct DeleteMeta {
+    num_deleted_docs: u32,
+    opstamp: u64,
+}
+
+/// SegmentMeta contains simple meta information about a segment.
+///
+/// For instance the number of docs it contains,
+/// how many are deleted, etc.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct SegmentMeta {
+    segment_id: SegmentId,
+    max_doc: u32,
+    deletes: Option<DeleteMeta>, 
+}
+
+impl SegmentMeta {
+
+    /// Creates a new segment meta for 
+    /// a segment with no deletes and no documents.
+    pub fn new(segment_id: SegmentId) -> SegmentMeta {
+        SegmentMeta {
+            segment_id: segment_id,
+            max_doc: 0,
+            deletes: None,
+        }
+    }
+
+    /// Returns the segment id.
+    pub fn id(&self) -> SegmentId {
+        self.segment_id
+    }
+
+    /// Returns the number of deleted documents.
+    pub fn num_deleted_docs(&self) -> u32 {
+        self.deletes
+            .as_ref()
+            .map(|delete_meta| delete_meta.num_deleted_docs)
+            .unwrap_or(0u32)
+    }
+
+    /// Returns the list of files that
+    /// are required for the segment meta.
+    ///
+    /// This is useful as the way tantivy removes files
+    /// is by removing all files that have been created by tantivy
+    /// and are not used by any segment anymore.
+    pub fn list_files(&self) -> HashSet<PathBuf> {
+        SegmentComponent::iterator()
+            .map(|component| {
+                self.relative_path(*component)
+            })
+            .collect::<HashSet<PathBuf>>()
+        
+    }
+
+    /// Returns the relative path of a component of our segment.
+    ///  
+    /// It just joins the segment id with the extension 
+    /// associated to a segment component.
+    pub fn relative_path(&self, component: SegmentComponent) -> PathBuf {
+        let mut path = self.id().uuid_string();
+        path.push_str(&*match component {
+            SegmentComponent::POSITIONS => ".pos".to_string(),
+            SegmentComponent::POSTINGS => ".idx".to_string(),
+            SegmentComponent::TERMS => ".term".to_string(),
+            SegmentComponent::STORE => ".store".to_string(),
+            SegmentComponent::FASTFIELDS => ".fast".to_string(),
+            SegmentComponent::FIELDNORMS => ".fieldnorm".to_string(),
+            SegmentComponent::DELETE => {format!(".{}.del", self.delete_opstamp().unwrap_or(0))},
+        });
+        PathBuf::from(path)
+    }
+
+    /// Return the highest doc id + 1
+    ///
+    /// If there are no deletes, then num_docs = max_docs
+    /// and all the doc ids contains in this segment
+    /// are exactly (0..max_doc).
+    pub fn max_doc(&self) -> u32 {
+        self.max_doc
+    }
+
+    /// Return the number of documents in the segment.
+    pub fn num_docs(&self) -> u32 {
+        self.max_doc() - self.num_deleted_docs()
+    }
+
+    /// Returns the opstamp of the last delete operation
+    /// taken in account in this segment.
+    pub fn delete_opstamp(&self) -> Option<u64> {
+        self.deletes
+            .as_ref()
+            .map(|delete_meta| delete_meta.opstamp)
+    }
+
+    /// Returns true iff the segment meta contains
+    /// delete information.
+    pub fn has_deletes(&self) -> bool {
+        self.deletes.is_some()
+    }
+
+    #[doc(hidden)]
+    pub fn set_max_doc(&mut self, max_doc: u32) {
+        self.max_doc = max_doc;
+    }
+
+    #[doc(hidden)]
+    pub fn set_delete_meta(&mut self, num_deleted_docs: u32, opstamp: u64) {
+        self.deletes = Some(DeleteMeta {
+            num_deleted_docs: num_deleted_docs,
+            opstamp: opstamp,
+        });
+    }
+}
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -3,26 +3,29 @@ use core::Segment;
 use core::SegmentId;
 use core::SegmentComponent;
 use schema::Term;
+use common::HasLen;
+use core::SegmentMeta;
+use fastfield::{self, FastFieldNotAvailableError};
+use fastfield::DeleteBitSet;
+use postings::BlockSegmentPostings;
 use store::StoreReader;
 use schema::Document;
 use directory::ReadOnlySource;
 use DocId;
-use std::io;
 use std::str;
 use postings::TermInfo;
-use datastruct::FstMap;
+use datastruct::TermDictionary;
+use std::sync::Arc;
 use std::fmt;
-use rustc_serialize::json;
-use core::SegmentInfo;
 use schema::Field;
 use postings::SegmentPostingsOption;
 use postings::SegmentPostings;
-use fastfield::{U32FastFieldsReader, U32FastFieldReader};
+use fastfield::{FastFieldsReader, FastFieldReader, U64FastFieldReader};
 use schema::Schema;
 use schema::FieldType;
 use postings::FreqHandler;
 use schema::TextIndexingOptions;
-use error::Error;
+


 /// Entry point to access all of the datastructures of the `Segment`
@@ -36,14 +39,16 @@ use error::Error;
 /// The segment reader has a very low memory footprint,
 /// as close to all of the memory data is mmapped.
 ///
+#[derive(Clone)]
 pub struct SegmentReader {
-    segment_info: SegmentInfo,
    segment_id: SegmentId,
-    term_infos: FstMap<TermInfo>,
+    segment_meta: SegmentMeta,
+    term_infos: Arc<TermDictionary<TermInfo>>,
    postings_data: ReadOnlySource,
    store_reader: StoreReader,
-    fast_fields_reader: U32FastFieldsReader,
-    fieldnorms_reader: U32FastFieldsReader,
+    fast_fields_reader: Arc<FastFieldsReader>,
+    fieldnorms_reader: Arc<FastFieldsReader>,
+    delete_bitset: DeleteBitSet,
    positions_data: ReadOnlySource,
    schema: Schema,
 }
@@ -54,30 +59,55 @@ impl SegmentReader {
    /// Today, `tantivy` does not handle deletes, so it happens
    /// to also be the number of documents in the index.
    pub fn max_doc(&self) -> DocId {
-        self.segment_info.max_doc
+        self.segment_meta.max_doc()
    }
    
+
+    pub fn schema(&self) -> &Schema {
+        &self.schema
+    }
+
    /// Returns the number of documents.
    /// Deleted documents are not counted.
    ///
    /// Today, `tantivy` does not handle deletes so max doc and
    /// num_docs are the same.
    pub fn num_docs(&self) -> DocId {
-        self.segment_info.max_doc
+        self.segment_meta.num_docs()
    }
    
+    /// Return the number of documents that have been
+    /// deleted in the segment.
+    pub fn num_deleted_docs(&self) -> DocId {
+        self.delete_bitset.len() as DocId
+    }
+
+    #[doc(hidden)]
+    pub fn fast_fields_reader(&self) -> &FastFieldsReader {
+        &*self.fast_fields_reader
+    }
+
    /// Accessor to a segment's fast field reader given a field.
-    pub fn get_fast_field_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
+    ///
+    /// Returns the u64 fast value reader if the field
+    /// is a u64 field indexed as "fast".
+    ///
+    /// Return a FastFieldNotAvailableError if the field is not
+    /// declared as a fast field in the schema.
+    ///
+    /// # Panics
+    /// May panic if the index is corrupted.
+    pub fn get_fast_field_reader<TFastFieldReader: FastFieldReader>(&self, field: Field) -> fastfield::Result<TFastFieldReader> {
        let field_entry = self.schema.get_field_entry(field);
-        match *field_entry.field_type() {
-            FieldType::Str(_) => {
-                Err(io::Error::new(io::ErrorKind::Other, "fast field are not yet supported for text fields."))
-            },
-            FieldType::U32(_) => {
-                // TODO check that the schema allows that
-                //Err(io::Error::new(io::ErrorKind::Other, "fast field are not yet supported for text fields."))
-                self.fast_fields_reader.get_field(field)
-            },
+        if !TFastFieldReader::is_enabled(field_entry.field_type()) {
+            Err(FastFieldNotAvailableError::new(field_entry))
+        }
+        else {
+            Ok(
+                self.fast_fields_reader
+                .open_reader(field)
+                .expect("Fast field file corrupted.")
+            )
        }
    }
    
@@ -88,8 +118,8 @@ impl SegmentReader {
    ///
    /// They are simply stored as a fast field, serialized in 
    /// the `.fieldnorm` file of the segment. 
-    pub fn get_fieldnorms_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
-        self.fieldnorms_reader.get_field(field) 
+    pub fn get_fieldnorms_reader(&self, field: Field) -> Option<U64FastFieldReader> {
+        self.fieldnorms_reader.open_reader(field) 
    }
        
    /// Returns the number of documents containing the term.
@@ -107,52 +137,49 @@ impl SegmentReader {

    /// Open a new segment for reading.
    pub fn open(segment: Segment) -> Result<SegmentReader> {
-        let segment_info_reader = try!(segment.open_read(SegmentComponent::INFO));
-        let segment_info_data = try!(
-            str::from_utf8(&*segment_info_reader)
-                .map_err(|err| {
-                    let segment_info_filepath = segment.relative_path(SegmentComponent::INFO);
-                    Error::CorruptedFile(segment_info_filepath, Box::new(err))
-                })
-         );
-        let segment_info: SegmentInfo = try!(
-            json::decode(&segment_info_data)
-            .map_err(|err| {
-                let file_path = segment.relative_path(SegmentComponent::INFO);
-                Error::CorruptedFile(file_path, Box::new(err))
-            })
-        );
+
        let source = try!(segment.open_read(SegmentComponent::TERMS));
-        let term_infos = try!(FstMap::from_source(source));
+        let term_infos = try!(TermDictionary::from_source(source));
        let store_reader = StoreReader::from(try!(segment.open_read(SegmentComponent::STORE)));
        let postings_shared_mmap = try!(segment.open_read(SegmentComponent::POSTINGS));
        
        let fast_field_data = try!(segment.open_read(SegmentComponent::FASTFIELDS));
-        let fast_fields_reader = try!(U32FastFieldsReader::open(fast_field_data));
+
+        let fast_fields_reader = try!(FastFieldsReader::open(fast_field_data));
        
        let fieldnorms_data = try!(segment.open_read(SegmentComponent::FIELDNORMS));
-        let fieldnorms_reader = try!(U32FastFieldsReader::open(fieldnorms_data));
+        let fieldnorms_reader = try!(FastFieldsReader::open(fieldnorms_data));
        
        let positions_data = segment
            .open_read(SegmentComponent::POSITIONS)
            .unwrap_or_else(|_| ReadOnlySource::empty());
        
+        let delete_bitset =
+            if segment.meta().has_deletes() {
+                let delete_data = segment.open_read(SegmentComponent::DELETE)?;
+                DeleteBitSet::open(delete_data)
+            }
+            else {
+                DeleteBitSet::empty()
+            };
+        
        let schema = segment.schema();
        Ok(SegmentReader {
-            segment_info: segment_info,
+            segment_meta: segment.meta().clone(),
            postings_data: postings_shared_mmap,
-            term_infos: term_infos,
+            term_infos: Arc::new(term_infos),
            segment_id: segment.id(),
            store_reader: store_reader,
-            fast_fields_reader: fast_fields_reader,
-            fieldnorms_reader: fieldnorms_reader,
+            fast_fields_reader: Arc::new(fast_fields_reader),
+            fieldnorms_reader: Arc::new(fieldnorms_reader),
+            delete_bitset: delete_bitset,
            positions_data: positions_data,
            schema: schema,
        })
    }
    
    /// Return the term dictionary datastructure.
-    pub fn term_infos(&self) -> &FstMap<TermInfo> {
+    pub fn term_infos(&self) -> &TermDictionary<TermInfo> {
        &self.term_infos
    }
       
@@ -165,16 +192,29 @@ impl SegmentReader {
    }


-    /// Returns the segment postings associated with the term, and with the given option,
-    /// or `None` if the term has never been encounterred and indexed. 
-    /// 
-    /// If the field was not indexed with the indexing options that cover 
-    /// the requested options, the returned `SegmentPostings` the method does not fail
-    /// and returns a `SegmentPostings` with as much information as possible.
-    ///
-    /// For instance, requesting `SegmentPostingsOption::FreqAndPositions` for a `TextIndexingOptions`
-    /// that does not index position will return a `SegmentPostings` with `DocId`s and frequencies.
-    pub fn read_postings(&self, term: &Term, option: SegmentPostingsOption) -> Option<SegmentPostings> {
+    pub fn postings_data(&self, offset: usize) -> &[u8] {
+        &self.postings_data[offset..]
+    }
+
+    pub fn get_block_postings(&self) -> BlockSegmentPostings {
+        BlockSegmentPostings::from_data(0, &self.postings_data[..], FreqHandler::new_without_freq())
+    }
+
+    pub fn read_block_postings_from_terminfo(&self, term_info: &TermInfo, field_type: &FieldType) -> Option<BlockSegmentPostings> {
+        let offset = term_info.postings_offset as usize;
+        let postings_data = &self.postings_data[offset..];
+        let freq_handler = match *field_type {
+            FieldType::Str(_) => {
+                FreqHandler::new_without_freq()
+            }
+            _ => {
+                FreqHandler::new_without_freq()
+            }
+        };
+        Some(BlockSegmentPostings::from_data(term_info.doc_freq as usize, postings_data, freq_handler))
+    }
+    
+    pub fn read_block_postings(&self, term: &Term, option: SegmentPostingsOption) -> Option<BlockSegmentPostings> {
        let field = term.field();
        let field_entry = self.schema.get_field_entry(field);
        let term_info = get!(self.get_term_info(&term));
@@ -214,10 +254,31 @@ impl SegmentReader {
                FreqHandler::new_without_freq()
            }
        };
-        Some(SegmentPostings::from_data(term_info.doc_freq, postings_data, freq_handler))
+        Some(BlockSegmentPostings::from_data(term_info.doc_freq as usize, postings_data, freq_handler))
    }
-        
+
+    /// Returns the segment postings associated with the term, and with the given option,
+    /// or `None` if the term has never been encounterred and indexed. 
+    /// 
+    /// If the field was not indexed with the indexing options that cover 
+    /// the requested options, the returned `SegmentPostings` the method does not fail
+    /// and returns a `SegmentPostings` with as much information as possible.
+    ///
+    /// For instance, requesting `SegmentPostingsOption::FreqAndPositions` for a `TextIndexingOptions`
+    /// that does not index position will return a `SegmentPostings` with `DocId`s and frequencies.
+    pub fn read_postings(&self, term: &Term, option: SegmentPostingsOption) -> Option<SegmentPostings> {
+        self.read_block_postings(term, option)
+            .map(|block_postings| {
+                 SegmentPostings::from_block_postings(block_postings, self.delete_bitset.clone())
+            })
+    }
+    
+
    /// Returns the posting list associated with a term.
+    ///
+    /// If the term is not found, return None.
+    /// Even when non-null, because of deletes, the posting object 
+    /// returned by this method may contain no documents.
    pub fn read_postings_all_info(&self, term: &Term) -> Option<SegmentPostings> {
        let field_entry = self.schema.get_field_entry(term.field());
        let segment_posting_option = match *field_entry.field_type() {
@@ -228,7 +289,7 @@ impl SegmentReader {
                    _ => SegmentPostingsOption::NoFreq,
                }
            }
-            FieldType::U32(_) => SegmentPostingsOption::NoFreq
+            FieldType::U64(_) | FieldType::I64(_) => SegmentPostingsOption::NoFreq
        };
        self.read_postings(term, segment_posting_option)
    }
@@ -237,6 +298,24 @@ impl SegmentReader {
    pub fn get_term_info(&self, term: &Term) -> Option<TermInfo> {
        self.term_infos.get(term.as_slice())
    }
+
+    /// Returns the segment id
+    pub fn segment_id(&self) -> SegmentId {
+        self.segment_id
+    }
+
+    /// Returns the bitset representing
+    /// the documents that have been deleted.
+    pub fn delete_bitset(&self) -> &DeleteBitSet {
+        &self.delete_bitset
+    }
+
+
+    /// Returns true iff the `doc` is marked
+    /// as deleted.
+    pub fn is_deleted(&self, doc: DocId) -> bool {
+        self.delete_bitset.is_deleted(doc)
+    }
 }


--- a/src/core/term_iterator.rs
+++ b/src/core/term_iterator.rs
@@ -0,0 +1,184 @@
+use fst::Streamer;
+use std::mem;
+use std::collections::BinaryHeap;
+use postings::TermInfo;
+use datastruct::TermDictionaryStreamer;
+use schema::Field;
+use schema::Term;
+use core::SegmentReader;
+use std::cmp::Ordering;
+
+
+#[derive(PartialEq, Eq, Debug)]
+struct HeapItem {
+    term: Term,
+    segment_ord: usize,
+}
+
+impl PartialOrd for HeapItem {
+    fn partial_cmp(&self, other: &HeapItem) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for HeapItem {
+    fn cmp(&self, other: &HeapItem) -> Ordering {
+        (&other.term, &other.segment_ord).cmp(&(&self.term, &self.segment_ord))
+    }
+}
+
+/// Given a list of sorted term streams,
+/// returns an iterator over sorted unique terms.
+///
+/// The item yield is actually a pair with
+/// - the term
+/// - a slice with the ordinal of the segments containing
+/// the terms.
+pub struct TermIterator<'a> {
+    key_streams: Vec<TermDictionaryStreamer<'a, TermInfo>>,
+    heap: BinaryHeap<HeapItem>,
+    // Buffer hosting the list of segment ordinals containing
+    // the current term.
+    current_term: Term,
+    current_segment_ords: Vec<usize>,
+}
+
+impl<'a> TermIterator<'a> {
+    fn new(key_streams: Vec<TermDictionaryStreamer<'a, TermInfo>>) -> TermIterator<'a> {
+        let key_streams_len = key_streams.len();
+        TermIterator {
+            key_streams: key_streams,
+            heap: BinaryHeap::new(),
+            current_term: Term::from_field_text(Field(0), ""),
+            current_segment_ords: (0..key_streams_len).collect(),
+        }
+    }
+
+    /// Advance the term iterator to the next term.
+    /// Returns true if there is indeed another term
+    /// False if there is none.
+    pub fn advance(&mut self) -> bool {
+        self.advance_segments();
+        if let Some(mut head) = self.heap.pop() {
+            mem::swap(&mut self.current_term, &mut head.term);
+            self.current_segment_ords.push(head.segment_ord);
+            loop {
+                match self.heap.peek() {
+                    Some(&ref next_heap_it) if next_heap_it.term == self.current_term => {}
+                    _ => { break; }
+                }
+                let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
+                self.current_segment_ords.push(next_heap_it.segment_ord);
+            }
+            true
+        }
+        else {
+            false
+        }
+    }
+
+
+    /// Returns the current term.
+    ///
+    /// This method may be called
+    /// iff advance() has been called before
+    /// and "true" was returned.
+    pub fn term(&self) -> &Term {
+        &self.current_term
+    }
+
+    /// Returns the sorted list of segment ordinals
+    /// that include the current term.
+    ///
+    /// This method may be called
+    /// iff advance() has been called before
+    /// and "true" was returned.
+    pub fn segment_ords(&self) -> &[usize]{
+        &self.current_segment_ords[..]
+    }
+
+    fn advance_segments(&mut self) {
+        for segment_ord in self.current_segment_ords.drain(..) {
+            if let Some((term, _val)) = self.key_streams[segment_ord].next() {
+                self.heap.push(HeapItem {
+                    term: Term::from_bytes(term),
+                    segment_ord: segment_ord,
+                });
+            }
+        }
+    }
+}
+
+impl<'a, 'f> Streamer<'a> for TermIterator<'f> {
+    type Item = &'a Term;
+
+    fn next(&'a mut self) -> Option<Self::Item> {
+        if self.advance() {
+            Some(&self.current_term)
+        }
+        else {
+            None
+        }
+    }
+}
+
+impl<'a> From<&'a [SegmentReader]> for TermIterator<'a> {
+    fn from(segment_readers: &'a [SegmentReader]) -> TermIterator<'a> {
+        TermIterator::new(
+            segment_readers
+            .iter()
+            .map(|reader| reader.term_infos().stream())
+            .collect()
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use schema::{SchemaBuilder, Document, TEXT};
+    use core::Index;
+
+    #[test]
+    fn test_term_iterator() {
+        let mut schema_builder = SchemaBuilder::default();
+        let text_field = schema_builder.add_text_field("text", TEXT);
+        let index = Index::create_in_ram(schema_builder.build());
+        {
+            let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
+            {
+                {
+                    let mut doc = Document::default();
+                    doc.add_text(text_field, "a b d f");
+                    index_writer.add_document(doc);
+                }
+                index_writer.commit().unwrap();
+            }
+            {
+                {
+                    let mut doc = Document::default();
+                    doc.add_text(text_field, "a b c d f");
+                    index_writer.add_document(doc);
+                }
+                index_writer.commit().unwrap();
+            }
+            {
+                {
+                    let mut doc = Document::default();
+                    doc.add_text(text_field, "e f");
+                    index_writer.add_document(doc);
+                }
+                index_writer.commit().unwrap();
+            }
+        }
+        index.load_searchers().unwrap();
+        let searcher = index.searcher();
+        let mut term_it = searcher.terms();
+        let mut terms = String::new();
+        while let Some(term) = term_it.next() {
+            terms.push_str(term.text());
+        }
+        assert_eq!(terms, "abcdef");
+    }
+
+}
--- a/src/datastruct/fstmap.rs
+++ b/src/datastruct/fstmap.rs
@@ -4,12 +4,12 @@ use std::io;
 use std::io::Write;
 use fst;
 use fst::raw::Fst;
-use fst::Streamer;

 use directory::ReadOnlySource;
 use common::BinarySerializable;
 use std::marker::PhantomData;

+
 fn convert_fst_error(e: fst::Error) -> io::Error {
    io::Error::new(io::ErrorKind::Other, e)
 }
@@ -21,7 +21,7 @@ pub struct FstMapBuilder<W: Write, V: BinarySerializable> {
 }

 impl<W: Write, V: BinarySerializable> FstMapBuilder<W, V> {
-
+    
    pub fn new(w: W) -> io::Result<FstMapBuilder<W, V>> {
        let fst_builder = try!(fst::MapBuilder::new(w).map_err(convert_fst_error));
        Ok(FstMapBuilder {
@@ -31,7 +31,28 @@ impl<W: Write, V: BinarySerializable> FstMapBuilder<W, V> {
        })
    }

-    pub fn insert(&mut self, key: &[u8], value: &V) -> io::Result<()>{
+    /// Horribly unsafe, nobody should ever do that... except me :)
+    /// 
+    /// If used, it must be used by systematically alternating calls
+    /// to insert_key and insert_value.
+    ///
+    /// TODO see if I can bend Rust typesystem to enforce that
+    /// in a nice way.
+    pub fn insert_key(&mut self, key: &[u8]) -> io::Result<()> {
+        try!(self.fst_builder
+            .insert(key, self.data.len() as u64)
+            .map_err(convert_fst_error));
+        Ok(())
+    }
+
+    /// Horribly unsafe, nobody should ever do that... except me :)
+    pub fn insert_value(&mut self, value: &V) -> io::Result<()> {
+        try!(value.serialize(&mut self.data));
+        Ok(())
+    }
+
+    #[cfg(test)]
+    pub fn insert(&mut self, key: &[u8], value: &V) -> io::Result<()> {
        try!(self.fst_builder
            .insert(key, self.data.len() as u64)
            .map_err(convert_fst_error));
@@ -66,27 +87,14 @@ fn open_fst_index(source: ReadOnlySource) -> io::Result<fst::Map> {
    }))
 }

-pub struct FstKeyIter<'a, V: 'static + BinarySerializable> {
-    streamer: fst::map::Stream<'a>,
-    __phantom__: PhantomData<V>
-}
-
-impl<'a, V: 'static + BinarySerializable> FstKeyIter<'a, V> {
-    pub fn next(&mut self) -> Option<(&[u8])> {
-        self.streamer
-            .next()
-            .map(|(k, _)| k)
-    }
-}
-
-
 impl<V: BinarySerializable> FstMap<V> {

-    pub fn keys(&self,) -> FstKeyIter<V> {
-        FstKeyIter {
-            streamer: self.fst_index.stream(),
-            __phantom__: PhantomData,
-        }
+    pub fn keys(&self,) -> fst::map::Keys {
+        self.fst_index.keys()
+    }
+
+    pub fn fst_index(&self) -> &fst::Map {
+        &self.fst_index
    }

    pub fn from_source(source: ReadOnlySource)  -> io::Result<FstMap<V>> {
@@ -104,8 +112,8 @@ impl<V: BinarySerializable> FstMap<V> {
            _phantom_: PhantomData,
        })
    }
-
-    fn read_value(&self, offset: u64) -> V {
+    
+    pub fn read_value(&self, offset: u64) -> V {
        let buffer = self.values_mmap.as_slice();
        let mut cursor = &buffer[(offset as usize)..];
        V::deserialize(&mut cursor).expect("Data in FST is corrupted")
@@ -123,6 +131,7 @@ mod tests {
    use super::*;
    use directory::{RAMDirectory, Directory};
    use std::path::PathBuf;
+    use fst::Streamer;

    #[test]
    fn test_fstmap() {
@@ -143,7 +152,6 @@ mod tests {
        assert_eq!(keys.next().unwrap(), "abc".as_bytes());
        assert_eq!(keys.next().unwrap(), "abcd".as_bytes());
        assert_eq!(keys.next(), None);
- 
    }

 }
--- a/src/datastruct/mod.rs
+++ b/src/datastruct/mod.rs
@@ -1,8 +1,15 @@
 mod fstmap;
 mod skip;
 pub mod stacker;
+mod stream_dictionary;
+
+
+//pub use self::fstmap::FstMapBuilder as TermDictionaryBuilder;
+//pub use self::fstmap::FstMap as TermDictionary;
+
+
+pub use self::stream_dictionary::StreamDictionaryBuilder as TermDictionaryBuilder;
+pub use self::stream_dictionary::StreamDictionary as TermDictionary;
+pub use self::stream_dictionary::StreamDictionaryStreamer as TermDictionaryStreamer;

-pub use self::fstmap::FstMapBuilder;
-pub use self::fstmap::FstMap;
-pub use self::fstmap::FstKeyIter;
 pub use self::skip::{SkipListBuilder, SkipList};
--- a/src/datastruct/skip/skiplist_builder.rs
+++ b/src/datastruct/skip/skiplist_builder.rs
@@ -36,7 +36,7 @@ impl<T: BinarySerializable> LayerBuilder<T> {
    fn insert(&mut self, doc_id: DocId, value: &T) -> io::Result<Option<(DocId, u32)>> {
        self.remaining -= 1;
        self.len += 1;
-        let offset = self.written_size() as u32; // TODO not sure if we want after or here
+        let offset = self.written_size() as u32;
        try!(doc_id.serialize(&mut self.buffer));
        try!(value.serialize(&mut self.buffer));
        Ok(if self.remaining == 0 {
--- a/src/datastruct/stacker/hashmap.rs
+++ b/src/datastruct/stacker/hashmap.rs
@@ -1,10 +1,6 @@
 use std::iter;
-use std::marker::PhantomData;
 use super::heap::{Heap, HeapAllocable, BytesRef};

-
-
-
 /// dbj2 hash function
 fn djb2(key: &[u8]) -> u64 {
    let mut state: u64 = 5381; 
@@ -57,17 +53,40 @@ pub enum Entry {
 /// the computation of the hash of the key twice,
 /// or copying the key as long as there is no insert.
 ///
-pub struct HashMap<'a, V> where V: HeapAllocable {
+pub struct HashMap<'a> {
    table: Box<[KeyValue]>,
    heap: &'a Heap,
-    _phantom: PhantomData<V>,
    mask: usize,
    occupied: Vec<usize>,
 }

-impl<'a, V> HashMap<'a, V> where V: HeapAllocable {
+struct QuadraticProbing {
+    hash: usize,
+    i: usize,
+    mask: usize,
+}

-    pub fn new(num_bucket_power_of_2: usize, heap: &'a Heap) -> HashMap<'a, V> {
+impl QuadraticProbing {
+    fn compute(key: &[u8], mask: usize) -> QuadraticProbing {
+        let hash = djb2(key) as usize;
+        QuadraticProbing {
+            hash: hash,
+            i: 0,
+            mask: mask,
+        }
+    }
+
+    #[inline]
+    fn next(&mut self) -> usize {
+        self.i += 1;
+        (self.hash + self.i * self.i) & self.mask
+    }
+}
+
+
+impl<'a> HashMap<'a> {
+
+    pub fn new(num_bucket_power_of_2: usize, heap: &'a Heap) -> HashMap<'a> {
        let table_size = 1 << num_bucket_power_of_2;
        let table: Vec<KeyValue> = iter::repeat(KeyValue::default())
            .take(table_size)
@@ -75,16 +94,17 @@ impl<'a, V> HashMap<'a, V> where V: HeapAllocable {
        HashMap {
            table: table.into_boxed_slice(),
            heap: heap,
-            _phantom: PhantomData,
            mask: table_size - 1,
            occupied: Vec::with_capacity(table_size / 2),
        }
    }

-    #[inline]
-    fn bucket(&self, key: &[u8]) -> usize {
-        let hash: u64 = djb2(key);
-        (hash as usize) & self.mask
+    fn probe(&self, key: &[u8]) -> QuadraticProbing {
+        QuadraticProbing::compute(key, self.mask)
+    }
+
+    pub fn is_saturated(&self) -> bool {
+        self.table.len() < self.occupied.len() * 5
    }

    fn get_key(&self, bytes_ref: BytesRef) -> &[u8] {
@@ -100,7 +120,7 @@ impl<'a, V> HashMap<'a, V> where V: HeapAllocable {
        addr
    }
    
-    pub fn iter<'b: 'a>(&'b self,) -> impl Iterator<Item=(&'a [u8], (u32, &'a V))> + 'b {
+    pub fn iter<'b: 'a>(&'b self,) -> impl Iterator<Item=(&'a [u8], u32)> + 'b {
        let heap: &'a Heap = self.heap;
        let table: &'b [KeyValue] = &self.table;
        self.occupied
@@ -109,23 +129,11 @@ impl<'a, V> HashMap<'a, V> where V: HeapAllocable {
            .map(move |bucket: usize| {
                let kv = table[bucket];
                let addr = kv.value_addr;
-                let v: &V = heap.get_mut_ref::<V>(addr);
-                (heap.get_slice(kv.key), (addr, v))
+                (heap.get_slice(kv.key), addr)
            })
-            // .map(move |addr: u32| (heap.get_mut_ref::<V>(addr))  )
    }

-    pub fn values_mut<'b: 'a>(&'b self,) -> impl Iterator<Item=&'a mut V> + 'b {
-        let heap: &'a Heap = self.heap;
-        let table: &'b [KeyValue] = &self.table;
-        self.occupied
-            .iter()
-            .cloned()
-            .map(move |bucket: usize| table[bucket].value_addr)
-            .map(move |addr: u32| heap.get_mut_ref::<V>(addr))
-    }
-
-    pub fn get_or_create<S: AsRef<[u8]>>(&mut self, key: S) -> &mut V {
+    pub fn get_or_create<S: AsRef<[u8]>, V: HeapAllocable>(&mut self, key: S) -> &mut V {
        let entry = self.lookup(key.as_ref());
        match entry {
            Entry::Occupied(addr) => {
@@ -141,8 +149,9 @@ impl<'a, V> HashMap<'a, V> where V: HeapAllocable {
    
    pub fn lookup<S: AsRef<[u8]>>(&self, key: S) -> Entry {
        let key_bytes: &[u8] = key.as_ref();
-        let mut bucket = self.bucket(key_bytes);
+        let mut probe = self.probe(key_bytes);
        loop {
+            let bucket = probe.next();
            let kv: KeyValue = self.table[bucket];
            if kv.is_empty() {
                return Entry::Vacant(bucket);
@@ -150,7 +159,6 @@ impl<'a, V> HashMap<'a, V> where V: HeapAllocable {
            if self.get_key(kv.key) == key_bytes {
                return Entry::Occupied(kv.value_addr);
            }
-            bucket = (bucket + 1) & self.mask;   
        }
    }
 }
@@ -183,14 +191,11 @@ mod tests {
    #[test]
    fn test_hash_map() {
        let heap = Heap::with_capacity(2_000_000);
-        let mut hash_map: HashMap<TestValue> = HashMap::new(18, &heap);
+        let mut hash_map: HashMap = HashMap::new(18, &heap);
        {
-            {
            let v: &mut TestValue = hash_map.get_or_create("abc");
            assert_eq!(v.val, 0u32);
            v.val = 3u32;
-            
-            }
        }
        {
            let v: &mut TestValue = hash_map.get_or_create("abcd");
@@ -205,10 +210,18 @@ mod tests {
            let v: &mut TestValue = hash_map.get_or_create("abcd");
            assert_eq!(v.val, 4u32);
        }
-        let mut iter_values = hash_map.values_mut();
-        assert_eq!(iter_values.next().unwrap().val, 3u32);
-        assert_eq!(iter_values.next().unwrap().val, 4u32);
-        assert!(!iter_values.next().is_some());
+        let mut iter_values = hash_map.iter();
+        {
+            let (_, addr) = iter_values.next().unwrap();
+            let val: &TestValue = heap.get_ref(addr);
+            assert_eq!(val.val, 3u32);
+        }
+        {
+            let (_, addr) = iter_values.next().unwrap();
+            let val: &TestValue = heap.get_ref(addr);
+            assert_eq!(val.val, 4u32);
+        }
+        assert!(iter_values.next().is_none());
    }

    #[bench]
--- a/src/datastruct/stacker/heap.rs
+++ b/src/datastruct/stacker/heap.rs
@@ -41,16 +41,10 @@ impl Heap {
        self.inner().clear();
    }
    
-    
    /// Return the heap capacity.
    pub fn capacity(&self,) -> u32 {
        self.inner().capacity()
    }
-    
-    /// Return the amount of memory that has been allocated so far. 
-    pub fn len(&self,) -> u32 {
-        self.inner().len()
-    }
        
    /// Return amount of free space, in bytes.
    pub fn num_free_bytes(&self,) -> u32 {
@@ -86,102 +80,78 @@ impl Heap {
    pub fn set<Item>(&self, addr: u32, val: &Item) {
        self.inner().set(addr, val);
    }
-    
-    /// Returns a mutable reference for an object at a given Item.
+
+    /// Returns a reference to an `Item` at a given `addr`.
+    #[cfg(test)]
+    pub fn get_ref<Item>(&self, addr: u32) -> &Item {
+        self.inner().get_mut_ref(addr)
+    }
+
+    /// Returns a mutable reference to an `Item` at a given `addr`.
    pub fn get_mut_ref<Item>(&self, addr: u32) -> &mut Item {
        self.inner().get_mut_ref(addr)
    }
+
+    pub fn get_ref<Item>(&self, addr: u32) -> &Item {
+        self.inner().get_mut_ref(addr)
+    }
 }


 struct InnerHeap {
    buffer: Vec<u8>,
-    buffer_len: u32,
    used: u32,
-    next_heap: Option<Box<InnerHeap>>,
+    has_been_resized: bool,
 }

-/// initializing a long Vec<u8> is crazy slow in 
-/// debug mode.
-/// We use this unsafe trick to make unit test
-/// way faster.
-fn allocate_fast(num_bytes: usize) -> Vec<u8> {
-    let mut buffer = Vec::with_capacity(num_bytes);
-    unsafe {
-        buffer.set_len(num_bytes);
-    }
-    buffer
-}

 impl InnerHeap {

    pub fn with_capacity(num_bytes: usize) -> InnerHeap {
-        let buffer: Vec<u8> = allocate_fast(num_bytes);
+        let buffer: Vec<u8> = vec![0u8; num_bytes];
        InnerHeap {
            buffer: buffer,
-            buffer_len: num_bytes as u32,
-            next_heap: None,
            used: 0u32,
+            has_been_resized: false,
        }
    }

    pub fn clear(&mut self) {
        self.used = 0u32;
-        self.next_heap = None;
    }

    pub fn capacity(&self,) -> u32 {
        self.buffer.len() as u32
    }
-
-    pub fn len(&self,) -> u32 {
-        self.used
-    }
    
    // Returns the number of free bytes. If the buffer
    // has reached it's capacity and overflowed to another buffer, return 0.
    pub fn num_free_bytes(&self,) -> u32 {
-        if self.next_heap.is_some() {
+        if self.has_been_resized {
            0u32
        }
        else {
-            self.buffer_len - self.used
+            (self.buffer.len() as u32) - self.used
        } 
    }

    pub fn allocate_space(&mut self, num_bytes: usize) -> u32 {
        let addr = self.used;
        self.used += num_bytes as u32;
-        if self.used <= self.buffer_len {
-            addr
+        let buffer_len = self.buffer.len();
+        if self.used > buffer_len as u32 {
+            self.buffer.resize(buffer_len * 2, 0u8);
+            self.has_been_resized = true
        }
-        else {
-            if self.next_heap.is_none() {
-                warn!("Exceeded heap size. The margin was apparently unsufficient. The segment will be committed right after indexing this very last document.");
-                self.next_heap = Some(Box::new(InnerHeap::with_capacity(self.buffer_len as usize)));
-            }
-            self.next_heap.as_mut().unwrap().allocate_space(num_bytes) + self.buffer_len
-        }
-        
-        
+        addr
    }
    
    fn get_slice(&self, start: u32, stop: u32) -> &[u8] {
-        if start >= self.buffer_len {
-            self.next_heap.as_ref().unwrap().get_slice(start - self.buffer_len, stop - self.buffer_len)
-        }
-        else {
-            &self.buffer[start as usize..stop as usize]
-        }
+        &self.buffer[start as usize..stop as usize]
    }
    
    fn get_mut_slice(&mut self, start: u32, stop: u32) -> &mut [u8] {
-        if start >= self.buffer_len {
-            self.next_heap.as_mut().unwrap().get_mut_slice(start - self.buffer_len, stop - self.buffer_len)
-        }
-        else {
-            &mut self.buffer[start as usize..stop as usize]
-        }
+        &mut self.buffer[start as usize..stop as usize]
    }

    fn allocate_and_set(&mut self, data: &[u8]) -> BytesRef {
@@ -195,40 +165,23 @@ impl InnerHeap {
    }

    fn get_mut(&mut self, addr: u32) -> *mut u8 {
-        if addr >= self.buffer_len {
-            self.next_heap.as_mut().unwrap().get_mut(addr - self.buffer_len)
-        }
-        else {
-            let addr_isize = addr as isize;
-            unsafe { self.buffer.as_mut_ptr().offset(addr_isize) }
-        }
+        let addr_isize = addr as isize;
+        unsafe { self.buffer.as_mut_ptr().offset(addr_isize) }
    }

-
-
    fn get_mut_ref<Item>(&mut self, addr: u32) -> &mut Item {
-        if addr >= self.buffer_len {
-            self.next_heap.as_mut().unwrap().get_mut_ref(addr - self.buffer_len)
-        }
-        else {
-            let v_ptr_u8 = self.get_mut(addr) as *mut u8;
-            let v_ptr = v_ptr_u8 as *mut Item;
-            unsafe { &mut *v_ptr }
-        }
+        let v_ptr_u8 = self.get_mut(addr) as *mut u8;
+        let v_ptr = v_ptr_u8 as *mut Item;
+        unsafe { &mut *v_ptr }
    }

    fn set<Item>(&mut self, addr: u32, val: &Item) {
-        if addr >= self.buffer_len {
-            self.next_heap.as_mut().unwrap().set(addr - self.buffer_len, val);
-        }
-        else {
-            let v_ptr: *const Item = val as *const Item;
-            let v_ptr_u8: *const u8 = v_ptr as *const u8;
-            debug_assert!(addr + mem::size_of::<Item>() as u32 <= self.used);
-            unsafe {
-                let dest_ptr: *mut u8 = self.get_mut(addr);
-                ptr::copy(v_ptr_u8, dest_ptr, mem::size_of::<Item>());
-            }
+        let v_ptr: *const Item = val as *const Item;
+        let v_ptr_u8: *const u8 = v_ptr as *const u8;
+        debug_assert!(addr + mem::size_of::<Item>() as u32 <= self.used);
+        unsafe {
+            let dest_ptr: *mut u8 = self.get_mut(addr);
+            ptr::copy(v_ptr_u8, dest_ptr, mem::size_of::<Item>());
        }
    }
 }
--- a/src/datastruct/stacker/mod.rs
+++ b/src/datastruct/stacker/mod.rs
@@ -18,10 +18,10 @@ fn test_unrolled_linked_list() {
        ks.push(2);
        ks.push(3);
        for k in (1..5).map(|k| k * 100) {        
-            let mut hashmap: HashMap<ExpUnrolledLinkedList> = HashMap::new(10, &heap);
+            let mut hashmap: HashMap = HashMap::new(10, &heap);
            for j in 0..k {
                for i in 0..500 {
-                    let mut list = hashmap.get_or_create(i.to_string());
+                    let mut list: &mut ExpUnrolledLinkedList = hashmap.get_or_create(i.to_string());
                    list.push(i*j, &heap);
                }
            }
--- a/src/datastruct/stream_dictionary.rs
+++ b/src/datastruct/stream_dictionary.rs
@@ -0,0 +1,465 @@
+#![allow(should_implement_trait)]
+
+use std::cmp::max;
+use std::io;
+use std::io::Write;
+use std::io::Read;
+use fst;
+use fst::raw::Fst;
+use common::VInt;
+use directory::ReadOnlySource;
+use common::BinarySerializable;
+use std::marker::PhantomData;
+use common::CountingWriter;
+use std::cmp::Ordering;
+use fst::{IntoStreamer, Streamer};
+use std::str;
+use fst::raw::Node;
+use fst::raw::CompiledAddr;
+
+const BLOCK_SIZE: usize = 1024;
+
+fn convert_fst_error(e: fst::Error) -> io::Error {
+    io::Error::new(io::ErrorKind::Other, e)
+}
+
+pub struct StreamDictionaryBuilder<W: Write, V: BinarySerializable + Clone + Default> {
+    write: CountingWriter<W>,
+    block_index: fst::MapBuilder<Vec<u8>>,
+    last_key: Vec<u8>,
+    len: usize,
+    _phantom_: PhantomData<V>,
+}
+
+fn common_prefix_length(left: &[u8], right: &[u8]) -> usize {
+    left.iter().cloned()
+        .zip(right.iter().cloned())
+        .take_while(|&(b1, b2)| b1 == b2)
+        .count()
+}
+
+
+
+fn fill_last<'a>(fst: &'a Fst, mut node: Node<'a>, buffer: &mut Vec<u8>) {
+    loop {
+        if let Some(transition) = node.transitions().last() {
+            buffer.push(transition.inp);
+            node = fst.node(transition.addr);
+        }
+        else {
+            break;
+        }
+    }
+}
+
+
+fn strictly_previous_key<B: AsRef<[u8]>>(fst_map: &fst::Map, key_as_ref: B) -> (Vec<u8>, u64) {
+    let key = key_as_ref.as_ref();
+    let fst = fst_map.as_fst();
+    let mut node = fst.root();
+    let mut node_stack: Vec<Node> = vec!(node.clone());
+
+    // first check the longest prefix.
+    for &b in &key[..key.len() - 1] {
+        node = match node.find_input(b) {
+            None => {
+                break;
+            },
+            Some(i) => {
+                fst.node(node.transition_addr(i))
+            },
+        };
+        node_stack.push(node);
+    }
+    
+    let len_node_stack = node_stack.len();
+    for i in (1..len_node_stack).rev() {
+        let cur_node = &node_stack[i];
+        let b: u8 = key[i];
+        let last_transition_opt = cur_node
+            .transitions()
+            .take_while(|transition| transition.inp < b)
+            .last();
+        
+        if let Some(last_transition) = last_transition_opt {
+            let mut result_buffer = Vec::from(&key[..i]);
+            result_buffer.push(last_transition.inp);
+            let mut result = Vec::from(&key[..i]);
+            result.push(last_transition.inp);
+            let fork_node = fst.node(last_transition.addr);
+            fill_last(fst, fork_node, &mut result);
+            let val = fst_map.get(&result).unwrap();
+            return (result, val);
+        }
+        else if cur_node.is_final() {
+            // the previous key is a prefix
+            let result_buffer = Vec::from(&key[..i]);
+            let val = fst_map.get(&result_buffer).unwrap();
+            return (result_buffer, val);
+        }
+    }
+
+    return (vec!(), 0);
+}
+
+
+impl<W: Write, V: BinarySerializable + Clone + Default> StreamDictionaryBuilder<W, V> {
+
+    pub fn new(write: W) -> io::Result<StreamDictionaryBuilder<W, V>> {
+        let buffer: Vec<u8> = vec!();
+        Ok(StreamDictionaryBuilder {
+            write: CountingWriter::wrap(write),
+            block_index: fst::MapBuilder::new(buffer)
+                .expect("This cannot fail"),
+            last_key: Vec::with_capacity(128),
+            len: 0,
+            _phantom_: PhantomData,
+        })
+    }
+    
+    fn add_index_entry(&mut self) {
+        self.block_index.insert(&self.last_key, self.write.written_bytes() as u64).unwrap();
+    }
+
+    pub fn insert(&mut self, key: &[u8], value: &V) -> io::Result<()>{
+        self.insert_key(key)?;
+        self.insert_value(value)
+    }
+
+    pub fn insert_key(&mut self, key: &[u8]) -> io::Result<()>{
+        if self.len % BLOCK_SIZE == 0 {
+            self.add_index_entry();
+        }
+        self.len += 1;
+        let common_len = common_prefix_length(key, &self.last_key);
+        VInt(common_len as u64).serialize(&mut self.write)?;
+        self.last_key.truncate(common_len);
+        self.last_key.extend_from_slice(&key[common_len..]);
+        VInt((key.len() - common_len) as u64).serialize(&mut self.write)?;
+        self.write.write_all(&key[common_len..])?;
+        Ok(())
+    }
+
+    pub fn insert_value(&mut self, value: &V) -> io::Result<()>{
+        value.serialize(&mut self.write)?;
+        Ok(())
+    }
+
+    pub fn finish(mut self) -> io::Result<W> {
+        self.add_index_entry();
+        let (mut w, split_len) = self.write.finish()?;
+        let fst_write = self.block_index
+            .into_inner()
+            .map_err(convert_fst_error)?;
+        w.write(&fst_write)?;
+        (split_len as u64).serialize(&mut w)?;
+        w.flush()?;
+        Ok(w)
+    }
+}
+
+
+
+fn stream_before<'a, V: 'a + Clone + Default + BinarySerializable>(stream_dictionary: &'a StreamDictionary<V>, target_key: &[u8]) -> StreamDictionaryStreamer<'a, V> {
+    let (prev_key, offset) = strictly_previous_key(&stream_dictionary.fst_index, target_key.as_ref());
+    let offset: usize = offset as usize;
+    StreamDictionaryStreamer {
+        cursor: &stream_dictionary.stream_data.as_slice()[offset..],
+        current_key: Vec::from(prev_key),
+        current_value: V::default(),
+    }
+}
+
+
+pub struct StreamDictionary<V> where V:BinarySerializable + Default + Clone {
+    stream_data: ReadOnlySource,
+    fst_index: fst::Map,
+    _phantom_: PhantomData<V>,
+}
+
+fn open_fst_index(source: ReadOnlySource) -> io::Result<fst::Map> {
+    Ok(fst::Map::from(match source {
+        ReadOnlySource::Anonymous(data) => try!(Fst::from_shared_bytes(data.data, data.start, data.len).map_err(convert_fst_error)),
+        ReadOnlySource::Mmap(mmap_readonly) => try!(Fst::from_mmap(mmap_readonly).map_err(convert_fst_error)),
+    }))
+}
+
+
+impl<V: BinarySerializable + Clone + Default> StreamDictionary<V> {
+    
+    pub fn from_source(source: ReadOnlySource)  -> io::Result<StreamDictionary<V>> {
+        let total_len = source.len();
+        let length_offset = total_len - 8;
+        let split_len: usize = {
+            let mut split_len_buffer: &[u8] = &source.as_slice()[length_offset..];
+            u64::deserialize(&mut split_len_buffer)? as  usize
+        };
+        let stream_data = source.slice(0, split_len);
+        let fst_data = source.slice(split_len, length_offset);
+        let fst_index = open_fst_index(fst_data)?;
+        
+        Ok(StreamDictionary {
+            stream_data: stream_data,
+            fst_index: fst_index,
+            _phantom_: PhantomData
+        })
+    }
+
+    pub fn get<K: AsRef<[u8]>>(&self, target_key: K) -> Option<V> {
+        let mut streamer = stream_before(self, target_key.as_ref());
+        while let Some((iter_key, iter_val)) = streamer.next() {
+            match iter_key.cmp(target_key.as_ref()) {
+                Ordering::Less => {}
+                Ordering::Equal => {
+                    let val: V = (*iter_val).clone();
+                    return Some(val);
+                }
+                Ordering::Greater => {
+                    return None;
+                }
+            }
+        }
+        return None;
+    }
+    
+    pub fn range(&self) -> StreamDictionaryStreamerBuilder<V> {
+        let data: &[u8] = &self.stream_data;
+        StreamDictionaryStreamerBuilder {
+            stream_dictionary: &self,
+            offset_from: 0,
+            offset_to: (data.as_ptr() as usize) + data.len(),
+            current_key: vec!(),
+        }
+    }
+
+    pub fn stream(&self) -> StreamDictionaryStreamer<V> {
+        StreamDictionaryStreamer {
+            cursor: &*self.stream_data,
+            current_key: Vec::with_capacity(128),
+            current_value: V::default(),
+        }
+    }
+}
+
+pub struct StreamDictionaryStreamerBuilder<'a, V: 'a + BinarySerializable + Clone + Default> {
+    stream_dictionary: &'a StreamDictionary<V>,
+    offset_from: usize,
+    offset_to: usize,
+    current_key: Vec<u8>,
+}
+
+
+/// Returns offset information for the first 
+/// key in the stream matching a given predicate.
+///
+/// returns (start offset, the data required to load the value)
+fn get_offset<'a, V, P: Fn(&[u8])->bool>(predicate: P, mut streamer: StreamDictionaryStreamer<V>) -> (usize, Vec<u8>)
+    where V: 'a + BinarySerializable + Clone + Default {
+    let mut prev: &[u8] = streamer.cursor;
+    
+    let mut prev_data: Vec<u8> = streamer.current_key.clone();
+    
+    while let Some((iter_key, _)) = streamer.next() {
+        if !predicate(iter_key) {
+            return (prev.as_ptr() as usize, prev_data);
+        }
+        prev = streamer.cursor;
+        prev_data.clear();
+        prev_data.extend_from_slice(iter_key);
+    }
+    return (prev.as_ptr() as usize, prev_data);
+}
+
+impl<'a, V: 'a + BinarySerializable + Clone + Default> StreamDictionaryStreamerBuilder<'a, V> {
+    pub fn ge<T: AsRef<[u8]>>(mut self, bound: T) -> StreamDictionaryStreamerBuilder<'a, V> {
+        let target_key = bound.as_ref();
+        let streamer = stream_before(&self.stream_dictionary, target_key.as_ref());
+        let smaller_than = |k: &[u8]| { k.lt(target_key) };
+        let (offset_before, current_key) = get_offset(smaller_than, streamer);
+        self.current_key = current_key;
+        self.offset_from = offset_before;
+        self
+    }
+
+    pub fn gt<T: AsRef<[u8]>>(mut self, bound: T) -> StreamDictionaryStreamerBuilder<'a, V> {
+        let target_key = bound.as_ref();
+        let streamer = stream_before(self.stream_dictionary, target_key.as_ref());
+        let smaller_than = |k: &[u8]| { k.le(target_key) };
+        let (offset_before, current_key) = get_offset(smaller_than, streamer);
+        self.current_key = current_key;
+        self.offset_from = offset_before;
+        self
+    }
+
+    pub fn lt<T: AsRef<[u8]>>(mut self, bound: T) -> StreamDictionaryStreamerBuilder<'a, V> {
+        let target_key = bound.as_ref();
+        let streamer = stream_before(self.stream_dictionary, target_key.as_ref());
+        let smaller_than = |k: &[u8]| { k.le(target_key) };
+        let (offset_before, _) = get_offset(smaller_than, streamer);
+        self.offset_to = offset_before;
+        self
+    }
+
+    pub fn le<T: AsRef<[u8]>>(mut self, bound: T) -> StreamDictionaryStreamerBuilder<'a, V> {
+        let target_key = bound.as_ref();
+        let streamer = stream_before(self.stream_dictionary, target_key.as_ref());
+        let smaller_than = |k: &[u8]| { k.lt(target_key) };
+        let (offset_before, _) = get_offset(smaller_than, streamer);
+        self.offset_to = offset_before;
+        self
+    }
+
+    pub fn into_stream(self) -> StreamDictionaryStreamer<'a, V> {
+        let data: &[u8] = &self.stream_dictionary.stream_data.as_slice()[..];
+        let origin = data.as_ptr() as usize;
+        let start = self.offset_from - origin;
+        let stop = max(self.offset_to - origin, start);
+        StreamDictionaryStreamer {
+            cursor: &data[start..stop],
+            current_key: self.current_key,
+            current_value: V::default(),
+        }
+    }
+}
+
+pub struct StreamDictionaryStreamer<'a, V: BinarySerializable> {
+    cursor: &'a [u8],
+    current_key: Vec<u8>,
+    current_value: V,
+}
+
+impl<'a, V: BinarySerializable> StreamDictionaryStreamer<'a, V> {
+    
+    pub fn next(&mut self) -> Option<(&[u8], &V)> {
+        if self.cursor.len() == 0 {
+            return None;
+        }
+        let common_length: usize = VInt::deserialize(&mut self.cursor).unwrap().0 as usize;
+        let new_length: usize = common_length + VInt::deserialize(&mut self.cursor).unwrap().0 as usize;
+        self.current_key.reserve(new_length);
+        unsafe {
+            self.current_key.set_len(new_length);
+        }
+        self.cursor.read_exact(&mut self.current_key[common_length..new_length]).unwrap();
+        self.current_value = V::deserialize(&mut self.cursor).unwrap();
+        Some((&self.current_key, &self.current_value))
+    }
+
+
+    pub fn key(&self) -> &[u8] {
+        &self.current_key
+    }
+
+    pub fn value(&self) -> &V {
+        &self.current_value
+    }
+}
+
+#[cfg(test)]
+mod test {
+    
+    use std::str;
+    use directory::ReadOnlySource;
+    use super::CountingWriter;
+    use std::io::Write;
+    use super::{BLOCK_SIZE, StreamDictionary, StreamDictionaryBuilder};
+
+    #[test]
+    fn test_stream_dictionary() {
+        let ids: Vec<_> = (0u32..10_000u32)
+            .map(|i| (format!("doc{:0>6}", i), i))
+            .collect();
+        let buffer: Vec<u8> = {
+            let mut stream_dictionary_builder = StreamDictionaryBuilder::new(vec!()).unwrap();
+            for &(ref id, ref i) in &ids {
+                stream_dictionary_builder.insert(id.as_bytes(), i).unwrap();
+            }
+            stream_dictionary_builder.finish().unwrap()
+        };
+        let source = ReadOnlySource::from(buffer);
+        let stream_dictionary: StreamDictionary<u32> = StreamDictionary::from_source(source).unwrap();
+        {
+            let mut streamer = stream_dictionary.stream();
+            let mut i = 0;
+            while let Some((streamer_k, streamer_v)) = streamer.next() {
+                let &(ref key, ref v) = &ids[i];
+                assert_eq!(streamer_k, key.as_bytes());
+                assert_eq!(streamer_v, v);
+                i += 1;
+            }
+        }
+        
+        let &(ref key, ref _v) = &ids[2047];
+        stream_dictionary.get(key.as_bytes());
+    }
+
+
+
+    #[test]
+    fn test_stream_range() {
+        let ids: Vec<_> = (0u32..10_000u32)
+            .map(|i| (format!("doc{:0>6}", i), i))
+            .collect();
+        let buffer: Vec<u8> = {
+            let mut stream_dictionary_builder = StreamDictionaryBuilder::new(vec!()).unwrap();
+            for &(ref id, ref i) in &ids {
+                stream_dictionary_builder.insert(id.as_bytes(), i).unwrap();
+            }
+            stream_dictionary_builder.finish().unwrap()
+        };
+        let source = ReadOnlySource::from(buffer);
+        
+        let stream_dictionary: StreamDictionary<u32> = StreamDictionary::from_source(source).unwrap();
+        {
+            for i in (0..20).chain((BLOCK_SIZE - 10..BLOCK_SIZE + 10)) {
+                let &(ref target_key, _) = &ids[i];
+                let mut streamer = stream_dictionary
+                    .range()
+                    .ge(target_key.as_bytes())
+                    .into_stream();
+                for j in 0..3 {
+                    let (streamer_k, streamer_v) = streamer.next().unwrap();
+                    let &(ref key, ref v) = &ids[i + j];
+                    assert_eq!(str::from_utf8(streamer_k).unwrap(), key);
+                    assert_eq!(streamer_v, v);
+                }
+            }
+        }
+
+        {
+            for i in (0..20).chain((BLOCK_SIZE - 10..BLOCK_SIZE + 10)) {
+                let &(ref target_key, _) = &ids[i];
+                let mut streamer = stream_dictionary
+                    .range()
+                    .gt(target_key.as_bytes())
+                    .into_stream();
+                for j in 0..3 {
+                    let (streamer_k, streamer_v) = streamer.next().unwrap();
+                    let &(ref key, ref v) = &ids[i + j + 1];
+                    assert_eq!(streamer_k, key.as_bytes());
+                    assert_eq!(streamer_v, v);
+                }
+            }
+        }
+
+        {
+            for i in (0..20).chain((BLOCK_SIZE - 10..BLOCK_SIZE + 10)) {
+                for j in 0..3 {
+                    let &(ref fst_key, _) = &ids[i];
+                    let &(ref last_key, _) = &ids[i + 3];
+                    let mut streamer = stream_dictionary
+                        .range()
+                        .ge(fst_key.as_bytes())
+                        .lt(last_key.as_bytes())
+                        .into_stream();
+                    for _ in 0..(j + 1) {
+                        assert!(streamer.next().is_some());
+                    }
+                    assert!(streamer.next().is_some());
+                }
+            }
+        }
+        
+    }
+    
+}
--- a/src/directory/directory.rs
+++ b/src/directory/directory.rs
@@ -1,13 +1,14 @@
 use std::marker::Send;
 use std::fmt;
 use std::path::Path;
-use directory::error::{FileError, OpenWriteError};
+use directory::error::{OpenReadError, DeleteError, OpenWriteError};
 use directory::{ReadOnlySource, WritePtr};
 use std::result;
 use std::io;
 use std::marker::Sync;

-/// Write-once read many (WORM) abstraction for where tantivy's index should be stored. 
+/// Write-once read many (WORM) abstraction for where
+/// tantivy's data should be stored. 
 ///
 /// There are currently two implementations of `Directory`
 /// 
@@ -25,16 +26,16 @@ pub trait Directory: fmt::Debug + Send + Sync + 'static {
    ///
    /// Specifically, subsequent writes or flushes should
    /// have no effect on the returned `ReadOnlySource` object. 
-    fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, FileError>;
-    
+    fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError>;
+
    /// Removes a file
    ///
    /// Removing a file will not affect an eventual
    /// existing ReadOnlySource pointing to it.
    /// 
    /// Removing a nonexistent file, yields a
-    /// `FileError::DoesNotExist`.
-    fn delete(&self, path: &Path) -> result::Result<(), FileError>;
+    /// `DeleteError::DoesNotExist`.
+    fn delete(&self, path: &Path) -> result::Result<(), DeleteError>;

    /// Returns true iff the file exists
    fn exists(&self, path: &Path) -> bool;
@@ -60,6 +61,12 @@ pub trait Directory: fmt::Debug + Send + Sync + 'static {
    /// The file may not previously exist.
    fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError>;
    
+    /// Reads the full content file that has been written using
+    /// atomic_write.
+    ///
+    /// This should only be used for small files.
+    fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError>;
+
    /// Atomically replace the content of a file with data.
    /// 
    /// This calls ensure that reads can never *observe*
@@ -70,6 +77,7 @@ pub trait Directory: fmt::Debug + Send + Sync + 'static {
        
    /// Clones the directory and boxes the clone 
    fn box_clone(&self) -> Box<Directory>;
+
 }


--- a/src/directory/error.rs
+++ b/src/directory/error.rs
@@ -27,9 +27,9 @@ impl From<io::Error> for OpenWriteError {
    }
 }

-/// Error that may occur when accessing a file (read, or delete)
+/// Error that may occur when accessing a file read
 #[derive(Debug)]
-pub enum FileError {
+pub enum OpenReadError {
    /// The file does not exists.
    FileDoesNotExist(PathBuf),
    /// Any kind of IO error that happens when 
@@ -37,8 +37,16 @@ pub enum FileError {
    IOError(io::Error),
 }

-impl From<io::Error> for FileError {
-    fn from(err: io::Error) -> FileError {
-        FileError::IOError(err)
-    }
+
+/// Error that may occur when trying to delete a file
+#[derive(Debug)]
+pub enum DeleteError {
+    /// The file does not exists.
+    FileDoesNotExist(PathBuf),
+    /// Any kind of IO error that happens when 
+    /// interacting with the underlying IO device.
+    IOError(io::Error),
+    /// The file may not be deleted because it is 
+    /// protected.
+    FileProtected(PathBuf),
 }
--- a/src/directory/managed_directory.rs
+++ b/src/directory/managed_directory.rs
@@ -0,0 +1,405 @@
+use std::path::{Path, PathBuf};
+use serde_json;
+use directory::error::{OpenReadError, DeleteError, OpenWriteError};
+use directory::{ReadOnlySource, WritePtr};
+use std::result;
+use std::io;
+use Directory;
+use std::sync::{Arc, RwLock};
+use std::collections::HashSet;
+use std::io::Write;
+use core::MANAGED_FILEPATH;
+use std::collections::HashMap;
+use std::fmt;
+use Result;
+use Error;
+
+/// Wrapper of directories that keeps track of files created by Tantivy.
+///
+/// A managed directory is just a wrapper of a directory
+/// that keeps a (persisted) list of the files that 
+/// have been created (and not deleted) by tantivy so far.
+///
+/// Thanks to this list, it implements a `garbage_collect` method
+/// that removes the files that were created by tantivy and are not
+/// useful anymore.
+#[derive(Debug)]
+pub struct ManagedDirectory {
+    directory: Box<Directory>,
+    meta_informations: Arc<RwLock<MetaInformation>>,
+}
+
+#[derive(Debug, Default)]
+struct MetaInformation {
+    managed_paths: HashSet<PathBuf>,
+    protected_files: HashMap<PathBuf, usize>,
+}
+
+
+/// A `FileProtection` prevents the garbage collection of a file.
+///
+/// See `ManagedDirectory.protect_file_from_delete`.
+pub struct FileProtection {
+    directory: ManagedDirectory,
+    path: PathBuf,
+}
+
+fn unprotect_file_from_delete(directory: &ManagedDirectory, path: &Path) {
+    let mut meta_informations_wlock = directory.meta_informations
+        .write()
+        .expect("Managed file lock poisoned");
+    if let Some(counter_ref_mut) = meta_informations_wlock
+        .protected_files
+        .get_mut(path) {
+        (*counter_ref_mut) -= 1;
+    }
+}
+
+impl fmt::Debug for FileProtection {
+    fn fmt(&self, formatter: &mut fmt::Formatter) -> result::Result<(), fmt::Error> {
+        write!(formatter, "FileProtectionFor({:?})", self.path)    
+    }
+}
+
+impl Drop for FileProtection {
+    fn drop(&mut self) {
+        unprotect_file_from_delete(&self.directory, &*self.path);
+    }
+}
+
+impl ManagedDirectory {
+
+    /// Wraps a directory as managed directory.
+    pub fn new<Dir: Directory>(directory: Dir) -> Result<ManagedDirectory> {
+        match directory.atomic_read(&MANAGED_FILEPATH) {
+            Ok(data) => {
+                let managed_files_json = String::from_utf8_lossy(&data);
+                let managed_files: HashSet<PathBuf> = serde_json::from_str(&managed_files_json)
+                    .map_err(|e| Error::CorruptedFile(MANAGED_FILEPATH.clone(), Box::new(e)))?;
+                Ok(ManagedDirectory {
+                    directory: box directory,
+                    meta_informations: Arc::new(RwLock::new(
+                        MetaInformation {
+                            managed_paths: managed_files,
+                            protected_files: HashMap::default()
+                        })),
+                })
+            }
+            Err(OpenReadError::FileDoesNotExist(_)) => {
+                Ok(ManagedDirectory {
+                    directory: box directory,
+                    meta_informations: Arc::default(),
+                })
+            }
+            Err(OpenReadError::IOError(e)) => {
+                Err(From::from(e))
+            }
+        }
+    }
+
+    /// Garbage collect unused files.
+    ///
+    /// Removes the files that were created by `tantivy` and are not
+    /// used by any segment anymore.
+    /// 
+    /// * `living_files` - List of files that are still used by the index.
+    ///
+    /// This method does not panick nor returns errors.
+    /// If a file cannot be deleted (for permission reasons for instance)
+    /// an error is simply logged, and the file remains in the list of managed
+    /// files.
+    pub fn garbage_collect(&mut self, living_files: HashSet<PathBuf>) {
+        let mut files_to_delete = vec!();
+        {   // releasing the lock as .delete() will use it too.
+            let meta_informations_rlock = self.meta_informations
+                .read()
+                .expect("Managed directory rlock poisoned in garbage collect.");
+            for managed_path in &meta_informations_rlock.managed_paths {
+                if !living_files.contains(managed_path) {
+                    files_to_delete.push(managed_path.clone());
+                }
+            }
+        }
+        
+        let mut deleted_files = vec!();
+        {
+            for file_to_delete in files_to_delete {
+                match self.delete(&file_to_delete) {
+                    Ok(_) => {
+                        info!("Deleted {:?}", file_to_delete);
+                        deleted_files.push(file_to_delete);
+                    }
+                    Err(file_error) => {
+                        error!("Failed to delete {:?}", file_to_delete);
+                        match file_error {
+                            DeleteError::FileDoesNotExist(_) => {
+                                deleted_files.push(file_to_delete);
+                            }
+                            DeleteError::IOError(_) => {
+                                if !cfg!(target_os = "windows") {
+                                    error!("Failed to delete {:?}", file_to_delete);
+                                }
+                            }
+                            DeleteError::FileProtected(_) => {
+                                // this is expected.
+                            }
+                        }
+                        
+                    }
+                }
+            }
+        }
+
+
+        if !deleted_files.is_empty() {
+            // update the list of managed files by removing 
+            // the file that were removed.
+            {
+                let mut meta_informations_wlock = self.meta_informations
+                    .write()
+                    .expect("Managed directory wlock poisoned (2).");
+                let managed_paths_write = &mut meta_informations_wlock.managed_paths;
+                for delete_file in &deleted_files {
+                    managed_paths_write.remove(delete_file);
+                }
+            }
+            if let Err(_) = self.save_managed_paths() {
+                error!("Failed to save the list of managed files.");
+            }
+        }
+
+    }
+
+
+    /// Protects a file from being garbage collected.
+    ///
+    /// The method returns a `FileProtection` object.
+    /// The file will not be garbage collected as long as the
+    /// `FileProtection` object is kept alive. 
+    pub fn protect_file_from_delete(&self, path: &Path) -> FileProtection {
+        let pathbuf = path.to_owned();
+        {
+            let mut meta_informations_wlock = self.meta_informations
+                .write()
+                .expect("Managed file lock poisoned on protect");
+            *meta_informations_wlock
+                .protected_files
+                .entry(pathbuf.clone())
+                .or_insert(0) += 1;
+        }
+        FileProtection {
+            directory: self.clone(),
+            path: pathbuf.clone(),
+        }
+    }
+
+
+    /// Saves the file containing the list of existing files
+    /// that were created by tantivy.
+    fn save_managed_paths(&mut self,) -> io::Result<()> {
+        let managed_paths;
+        {
+            let meta_informations_rlock = self.meta_informations
+                .read()
+                .expect("Managed file lock poisoned");
+            managed_paths = meta_informations_rlock.managed_paths.clone();
+        }
+        let mut w = try!(serde_json::to_vec(&managed_paths));
+        try!(write!(&mut w, "\n"));
+        self.directory.atomic_write(&MANAGED_FILEPATH, &w[..])?;
+        Ok(())
+    }
+
+    /// Registers a file as managed
+    /// 
+    /// This method must be called before the file is 
+    /// actually created to ensure that a failure between
+    /// registering the filepath and creating the file
+    /// will not lead to garbage files that will 
+    /// never get removed.
+    fn register_file_as_managed(&mut self, filepath: &Path) -> io::Result<()> {
+        let has_changed = {
+            let mut meta_wlock = self.meta_informations
+                .write()
+                .expect("Managed file lock poisoned");
+            meta_wlock.managed_paths.insert(filepath.to_owned())
+        };
+        if has_changed {
+            self.save_managed_paths()?;
+        }
+        Ok(())
+    }
+}
+
+impl Directory for ManagedDirectory {
+    
+    fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
+        self.directory.open_read(path)
+    }
+
+    fn open_write(&mut self, path: &Path) -> result::Result<WritePtr, OpenWriteError> {
+        self.register_file_as_managed(path)?;
+        self.directory.open_write(path)
+    }
+
+    fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
+        self.register_file_as_managed(path)?;
+        self.directory.atomic_write(path, data)
+    }
+
+    fn atomic_read(&self, path: &Path) -> result::Result<Vec<u8>, OpenReadError> {
+        self.directory.atomic_read(path)
+    }
+
+    fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
+        {
+            let metas_rlock = self.meta_informations
+                .read()
+                .expect("poisoned lock in managed directory meta");
+            if let Some(counter) = metas_rlock.protected_files.get(path) {
+                if *counter > 0 {
+                    return Err(DeleteError::FileProtected(path.to_owned()))
+                }
+            }
+        }
+        self.directory.delete(path)
+    }
+
+    fn exists(&self, path: &Path) -> bool {
+        self.directory.exists(path)
+    }
+    
+    fn box_clone(&self) -> Box<Directory> {
+        box self.clone()
+    }
+
+}
+
+impl Clone for ManagedDirectory {
+    fn clone(&self) -> ManagedDirectory {
+        ManagedDirectory {
+            directory: self.directory.box_clone(),
+            meta_informations: self.meta_informations.clone(),
+        }
+    }
+}
+
+
+
+
+#[cfg(test)]
+mod tests {
+
+    use super::*;
+    use directory::MmapDirectory;
+    use std::path::Path;   
+    use std::io::Write;
+    use tempdir::TempDir;
+    
+    lazy_static! {
+        static ref TEST_PATH1: &'static Path = Path::new("some_path_for_test");
+        static ref TEST_PATH2: &'static Path = Path::new("some_path_for_test2");
+    }
+
+    #[test]
+    fn test_managed_directory() {
+        let tempdir = TempDir::new("index").unwrap();
+        let tempdir_path = PathBuf::from(tempdir.path());
+        {
+            let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
+            let mut managed_directory = ManagedDirectory::new(mmap_directory).unwrap();
+            {
+                let mut write_file = managed_directory.open_write(*TEST_PATH1).unwrap();
+                write_file.flush().unwrap();
+            }
+            {
+                managed_directory.atomic_write(*TEST_PATH2, &vec!(0u8,1u8)).unwrap();
+            }
+            {
+                assert!(managed_directory.exists(*TEST_PATH1));
+                assert!(managed_directory.exists(*TEST_PATH2));
+            }
+            {
+                let living_files: HashSet<PathBuf> = [TEST_PATH1.to_owned()]
+                    .into_iter()
+                    .cloned()
+                    .collect();
+                managed_directory.garbage_collect(living_files);
+            }
+            {
+                assert!(managed_directory.exists(*TEST_PATH1));
+                assert!(!managed_directory.exists(*TEST_PATH2));
+            }
+        }
+        {
+            let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
+            let mut managed_directory = ManagedDirectory::new(mmap_directory).unwrap();
+            {
+                assert!(managed_directory.exists(*TEST_PATH1));
+                assert!(!managed_directory.exists(*TEST_PATH2));
+            }
+            {
+                let living_files: HashSet<PathBuf> = HashSet::new();
+                managed_directory.garbage_collect(living_files);
+            }
+            {
+                assert!(!managed_directory.exists(*TEST_PATH1));
+                assert!(!managed_directory.exists(*TEST_PATH2));
+            }
+        }   
+    }
+
+    #[test]
+    fn test_managed_directory_gc_while_mmapped() {
+        let tempdir = TempDir::new("index").unwrap();
+        let tempdir_path = PathBuf::from(tempdir.path());
+        let living_files = HashSet::new();
+
+        let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
+        let mut managed_directory = ManagedDirectory::new(mmap_directory).unwrap();
+        managed_directory.atomic_write(*TEST_PATH1, &vec!(0u8,1u8)).unwrap();
+        assert!(managed_directory.exists(*TEST_PATH1));
+
+        let _mmap_read = managed_directory.open_read(*TEST_PATH1).unwrap();            
+        managed_directory.garbage_collect(living_files.clone());
+        if cfg!(target_os = "windows") {
+            // On Windows, gc should try and fail the file as it is mmapped.
+            assert!(managed_directory.exists(*TEST_PATH1));
+            // unmap should happen here.
+            drop(_mmap_read);
+            // The file should still be in the list of managed file and
+            // eventually be deleted once mmap is released.
+            managed_directory.garbage_collect(living_files);
+            assert!(!managed_directory.exists(*TEST_PATH1));
+        }
+        else {
+            assert!(!managed_directory.exists(*TEST_PATH1));
+        }
+
+    }
+
+
+    #[test]
+    fn test_managed_directory_protect() {
+        let tempdir = TempDir::new("index").unwrap();
+        let tempdir_path = PathBuf::from(tempdir.path());
+        let living_files = HashSet::new();
+
+        let mmap_directory = MmapDirectory::open(&tempdir_path).unwrap();
+        let mut managed_directory = ManagedDirectory::new(mmap_directory).unwrap();
+        managed_directory.atomic_write(*TEST_PATH1, &vec!(0u8,1u8)).unwrap();
+        assert!(managed_directory.exists(*TEST_PATH1));
+
+        {
+            let _file_protection = managed_directory.protect_file_from_delete(*TEST_PATH1);
+            managed_directory.garbage_collect(living_files.clone());
+            assert!(managed_directory.exists(*TEST_PATH1));
+        }
+
+        managed_directory.garbage_collect(living_files.clone());
+        assert!(!managed_directory.exists(*TEST_PATH1));
+        
+
+    }
+
+}
--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -1,27 +1,158 @@
-use std::path::{Path, PathBuf};
-use tempdir::TempDir;
-use std::collections::HashMap;
-use std::collections::hash_map::Entry as HashMapEntry;
-use fst::raw::MmapReadOnly;
-use std::fs::File;
 use atomicwrites;
-use std::sync::RwLock;
-use std::fmt;
-use std::io::Write;
-use std::io;
-use std::io::{Seek, SeekFrom};
-use directory::Directory;
-use directory::ReadOnlySource;
-use directory::WritePtr;
-use std::io::BufWriter;
-use std::fs::OpenOptions;
-use directory::error::{OpenWriteError, FileError, OpenDirectoryError};
-use std::result;
 use common::make_io_err;
-use std::sync::Arc;
-use std::fs;
+use directory::Directory;
+use directory::error::{OpenWriteError, OpenReadError, DeleteError, OpenDirectoryError};
+use directory::ReadOnlySource;
 use directory::shared_vec_slice::SharedVecSlice;
+use directory::WritePtr;
+use fst::raw::MmapReadOnly;
+use memmap::{Mmap, Protection};
+use std::collections::hash_map::Entry as HashMapEntry;
+use std::collections::HashMap;
+use std::convert::From;
+use std::fmt;
+use std::fs::{self, File};
+use std::fs::OpenOptions;
+use std::io::{self, Seek, SeekFrom};
+use std::io::{BufWriter, Read, Write};
+use std::mem;
+use std::path::{Path, PathBuf};
+use std::result;
+use std::sync::Arc;
+use std::sync::RwLock;
+use std::sync::Weak;
+use tempdir::TempDir;

+fn open_mmap(full_path: &PathBuf) -> result::Result<Option<Arc<Mmap>>, OpenReadError> {
+    let convert_file_error = |err: io::Error| {
+        if err.kind() == io::ErrorKind::NotFound {
+            OpenReadError::FileDoesNotExist(full_path.clone())
+        }
+        else {
+            OpenReadError::IOError(err)
+        }
+    };
+    let file = File::open(&full_path).map_err(convert_file_error)?;
+    let meta_data = file
+        .metadata()
+        .map_err(|e| OpenReadError::IOError(e))?;
+    if meta_data.len() == 0 {
+        // if the file size is 0, it will not be possible 
+        // to mmap the file, so we return an anonymous mmap_cache
+        // instead.
+        return Ok(None)
+    }
+    match Mmap::open(&file, Protection::Read) {
+        Ok(mmap) => {
+            Ok(Some(Arc::new(mmap)))
+        }
+        Err(e) => {
+            Err(OpenReadError::IOError(e))
+        }
+    }
+    
+}
+
+#[derive(Default,Clone,Debug,Serialize,Deserialize)]
+pub struct CacheCounters {
+    // Number of time the cache prevents to call `mmap`
+    pub hit: usize,
+    // Number of time tantivy had to call `mmap`
+    // as no entry was in the cache.
+    pub miss_empty: usize,
+    // Number of time tantivy had to call `mmap`
+    // as the entry in the cache was evinced.
+    pub miss_weak: usize,
+}
+
+#[derive(Clone,Debug,Serialize,Deserialize)]
+pub struct CacheInfo {
+    pub counters: CacheCounters,
+    pub mmapped: Vec<PathBuf>,
+}
+
+struct MmapCache {
+    counters: CacheCounters,
+    cache: HashMap<PathBuf, Weak<Mmap>>,
+    purge_weak_limit: usize,
+}
+
+const STARTING_PURGE_WEAK_LIMIT: usize = 1_000;
+
+impl Default for MmapCache {
+    fn default() -> MmapCache {
+        MmapCache {
+            counters: CacheCounters::default(),
+            cache: HashMap::new(),
+            purge_weak_limit: STARTING_PURGE_WEAK_LIMIT,
+        }
+    }
+}
+
+
+impl MmapCache {
+
+   fn cleanup(&mut self) {
+        let previous_cache_size = self.cache.len();
+        let mut new_cache = HashMap::new();
+        mem::swap(&mut new_cache, &mut self.cache);
+        self.cache = new_cache
+            .into_iter()
+            .filter(|&(_, ref weak_ref)| weak_ref.upgrade().is_some())
+            .collect();
+        if self.cache.len() == previous_cache_size {
+            self.purge_weak_limit *= 2;
+        }
+    }
+
+    fn get_info(&mut self) -> CacheInfo {
+        self.cleanup();
+        let paths: Vec<PathBuf> = self.cache.keys()
+            .cloned()
+            .collect();
+        CacheInfo {
+            counters: self.counters.clone(),
+            mmapped: paths,
+        }
+    }
+
+    fn get_mmap(&mut self, full_path: PathBuf) -> Result<Option<Arc<Mmap>>, OpenReadError> {
+        // if we exceed this limit, then we go through the weak
+        // and remove those that are obsolete.
+        if self.cache.len() > self.purge_weak_limit {
+            self.cleanup();
+        }
+        Ok(match self.cache.entry(full_path.clone()) {
+            HashMapEntry::Occupied(mut occupied_entry) => {
+                if let Some(mmap_arc) = occupied_entry.get().upgrade() {
+                    self.counters.hit += 1;
+                    Some(mmap_arc.clone())
+                }
+                else {
+                    // The entry exists but the weak ref has been destroyed.
+                    self.counters.miss_weak += 1;
+                    if let Some(mmap_arc) = open_mmap(&full_path)? {
+                        occupied_entry.insert(Arc::downgrade(&mmap_arc));
+                        Some(mmap_arc)
+                    }
+                    else {
+                        None
+                    }
+                }
+            }
+            HashMapEntry::Vacant(vacant_entry) => {
+                self.counters.miss_empty += 1;
+                if let Some(mmap_arc) = open_mmap(&full_path)? {
+                    vacant_entry.insert(Arc::downgrade(&mmap_arc));
+                    Some(mmap_arc)
+                }
+                else {
+                    None
+                }
+            }
+        })
+    }
+}

 /// Directory storing data in files, read via mmap.
 ///
@@ -30,8 +161,9 @@ use directory::shared_vec_slice::SharedVecSlice;
 #[derive(Clone)]
 pub struct MmapDirectory {
    root_path: PathBuf,
-    mmap_cache: Arc<RwLock<HashMap<PathBuf, MmapReadOnly>>>,
+    mmap_cache: Arc<RwLock<MmapCache>>,
    _temp_directory: Arc<Option<TempDir>>,
+    
 }

 impl fmt::Debug for MmapDirectory {
@@ -40,8 +172,6 @@ impl fmt::Debug for MmapDirectory {
   }
 }

-
-
 impl MmapDirectory {

    /// Creates a new MmapDirectory in a temporary directory.
@@ -53,13 +183,12 @@ impl MmapDirectory {
        let tempdir_path = PathBuf::from(tempdir.path());
        let directory = MmapDirectory {
            root_path: PathBuf::from(tempdir_path),
-            mmap_cache: Arc::new(RwLock::new(HashMap::new())),
+            mmap_cache: Arc::new(RwLock::new(MmapCache::default())),
            _temp_directory: Arc::new(Some(tempdir))
        };
        Ok(directory)
    }

-
    /// Opens a MmapDirectory in a directory.
    ///
    /// Returns an error if the `directory_path` does not
@@ -74,7 +203,7 @@ impl MmapDirectory {
        else {
            Ok(MmapDirectory {
                root_path: PathBuf::from(directory_path),
-                mmap_cache: Arc::new(RwLock::new(HashMap::new())),
+                mmap_cache: Arc::new(RwLock::new(MmapCache::default())),
                _temp_directory: Arc::new(None)
            })
        }
@@ -89,11 +218,40 @@ impl MmapDirectory {
    /// Sync the root directory.
    /// In certain FS, this is required to persistently create
    /// a file.
-    fn sync_directory(&self,) -> Result<(), io::Error> {
-        let fd = try!(File::open(&self.root_path));
+    fn sync_directory(&self) -> Result<(), io::Error> {
+        let mut open_opts = OpenOptions::new();
+
+        // Linux needs read to be set, otherwise returns EINVAL
+        // write must not be set, or it fails with EISDIR
+        open_opts.read(true);
+
+        // On Windows, opening a directory requires FILE_FLAG_BACKUP_SEMANTICS
+        // and calling sync_all() only works if write access is requested.
+        #[cfg(windows)]
+        {
+            use std::os::windows::fs::OpenOptionsExt;
+            use winapi::winbase;
+
+            open_opts.write(true)
+                .custom_flags(winbase::FILE_FLAG_BACKUP_SEMANTICS);
+        }
+
+        let fd = try!(open_opts.open(&self.root_path));
        try!(fd.sync_all());
        Ok(())
    }
+    /// Returns some statistical information
+    /// about the Mmap cache.
+    /// 
+    /// The `MmapDirectory` embeds a `MmapDirectory` 
+    /// to avoid multiplying the `mmap` system calls.
+    pub fn get_cache_info(&mut self) -> CacheInfo {
+        self.mmap_cache
+            .write()
+            .expect("Mmap cache lock is poisoned.")
+            .get_info()
+    }
+        

 }

@@ -128,47 +286,21 @@ impl Seek for SafeFileWriter {

 impl Directory for MmapDirectory {
    
-    
-
-    fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, FileError> {
+    fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
        debug!("Open Read {:?}", path);
        let full_path = self.resolve_path(path);
        
-        let mut mmap_cache = try!(
-            self.mmap_cache
-                .write()
-                .map_err(|_| {
-                    make_io_err(format!("Failed to acquired write lock on mmap cache while reading {:?}", path))
-                })
-        );
-
-        let mmap = match mmap_cache.entry(full_path.clone()) {
-            HashMapEntry::Occupied(e) => {
-                e.get().clone()
-            }
-            HashMapEntry::Vacant(vacant_entry) => {
-                let file = try!(
-                    File::open(&full_path).map_err(|err| {
-                        if err.kind() == io::ErrorKind::NotFound {
-                            FileError::FileDoesNotExist(full_path.clone())
-                        }
-                        else {
-                            FileError::IOError(err)
-                        }
-                    })
-                );
-                if try!(file.metadata()).len() == 0 {
-                    // if the file size is 0, it will not be possible 
-                    // to mmap the file, so we return an anonymous mmap_cache
-                    // instead.
-                    return Ok(ReadOnlySource::Anonymous(SharedVecSlice::empty()))
-                }
-                let new_mmap = try!(MmapReadOnly::open(&file));
-                vacant_entry.insert(new_mmap.clone());
-                new_mmap
-            }
-        };        
-        Ok(ReadOnlySource::Mmap(mmap))
+        let mut mmap_cache = self.mmap_cache
+            .write()
+            .map_err(|_| OpenReadError::IOError(
+                make_io_err(format!("Failed to acquired write lock on mmap cache while reading {:?}", path))
+            ))?;
+        
+        Ok(mmap_cache.get_mmap(full_path)?
+            .map(MmapReadOnly::from)
+            .map(ReadOnlySource::Mmap)
+            .unwrap_or(ReadOnlySource::Anonymous(SharedVecSlice::empty()))
+        )
    }
    
    fn open_write(&mut self, path: &Path) -> Result<WritePtr, OpenWriteError> {
@@ -202,22 +334,32 @@ impl Directory for MmapDirectory {
        Ok(BufWriter::new(Box::new(writer)))
    }

-    fn delete(&self, path: &Path) -> result::Result<(), FileError> {
-        debug!("Delete {:?}", path);
+    fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
+        debug!("Deleting file {:?}", path);
        let full_path = self.resolve_path(path);
        let mut mmap_cache = try!(self.mmap_cache
            .write()
-            .map_err(|_| {
-                make_io_err(format!("Failed to acquired write lock on mmap cache while deleting {:?}", path))
-            })
+            .map_err(|_| 
+                 DeleteError::IOError(make_io_err(format!("Failed to acquired write lock on mmap cache while deleting {:?}", path))))
        );
        // Removing the entry in the MMap cache.
        // The munmap will appear on Drop,
        // when the last reference is gone.
-        mmap_cache.remove(&full_path);
-        try!(fs::remove_file(&full_path));
-        try!(self.sync_directory());
-        Ok(())
+        mmap_cache.cache.remove(&full_path);
+        match fs::remove_file(&full_path) {
+            Ok(_) => {
+                self.sync_directory()
+                    .map_err(|e| DeleteError::IOError(e))
+            }
+            Err(e) => {
+                if e.kind() == io::ErrorKind::NotFound {
+                    Err(DeleteError::FileDoesNotExist(path.to_owned()))
+                }
+                else {
+                    Err(DeleteError::IOError(e))
+                }
+            }
+        }
    }

    fn exists(&self, path: &Path) -> bool {
@@ -225,6 +367,27 @@ impl Directory for MmapDirectory {
        full_path.exists()
    }

+    fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> {
+        let full_path = self.resolve_path(path);
+        let mut buffer = Vec::new();
+        match File::open(&full_path) {
+            Ok(mut file) => {
+                file.read_to_end(&mut buffer)
+                    .map_err(|e| OpenReadError::IOError(e))?;
+                Ok(buffer)
+            }
+            Err(e) => {
+                if e.kind() == io::ErrorKind::NotFound {
+                    Err(OpenReadError::FileDoesNotExist(path.to_owned()))
+                }
+                else {
+                    Err(OpenReadError::IOError(e))
+                }
+            }
+        }
+        
+    }
+
    fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
        debug!("Atomic Write {:?}", path);
        let full_path = self.resolve_path(path);
@@ -238,5 +401,100 @@ impl Directory for MmapDirectory {
    fn box_clone(&self,) -> Box<Directory> {
        Box::new(self.clone())
    }
-    
+}
+
+
+
+
+#[cfg(test)]
+mod tests {
+
+    // There are more tests in directory/mod.rs
+    // The following tests are specific to the MmapDirectory
+
+    use super::*;
+
+    #[test]
+    fn test_open_empty() {
+        // empty file is actually an edge case because those
+        // cannot be mmapped.
+        //
+        // In that case the directory returns a SharedVecSlice.
+        let mut mmap_directory = MmapDirectory::create_from_tempdir().unwrap();
+        let path = PathBuf::from("test");
+        {
+            let mut w = mmap_directory.open_write(&path).unwrap();
+            w.flush().unwrap();
+        }
+        let readonlymap = mmap_directory.open_read(&path).unwrap();
+        assert_eq!(readonlymap.len(), 0);
+    }
+
+    #[test]
+    fn test_cache() {
+        let content = "abc".as_bytes();
+
+        // here we test if the cache releases
+        // mmaps correctly.
+        let mut mmap_directory = MmapDirectory::create_from_tempdir().unwrap();
+        let paths: Vec<PathBuf> = (0..10)
+            .map(|i| PathBuf::from(&*format!("file_{}", i)))
+            .collect();
+        {
+            for path in &paths {
+                let mut w = mmap_directory.open_write(path).unwrap();
+                w.write(content).unwrap();
+                w.flush().unwrap();
+            }
+        }
+        {
+            for path in &paths {
+                {
+                    let _r = mmap_directory.open_read(path).unwrap();
+                    assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 1);
+                }
+                assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 0);
+            }
+        }
+        assert_eq!(mmap_directory.get_cache_info().counters.miss_empty, 10);
+        
+        
+        {        
+            // test weak miss
+            // the first pass create the weak refs.
+            for path in &paths {
+                let _r = mmap_directory.open_read(path).unwrap();
+            }
+            // ... the second hits the weak refs.
+            for path in &paths {
+                let _r = mmap_directory.open_read(path).unwrap();
+            }
+            let cache_info = mmap_directory.get_cache_info();
+            assert_eq!(cache_info.counters.miss_empty, 20);
+            assert_eq!(cache_info.counters.miss_weak, 10);
+        }
+
+        {
+            let mut saved_readmmaps = vec!();
+            // Keeps reference alive
+            for (i, path) in paths.iter().enumerate() {
+                let r = mmap_directory.open_read(path).unwrap();
+                saved_readmmaps.push(r);
+                assert_eq!(mmap_directory.get_cache_info().mmapped.len(), i + 1);
+            }
+            let cache_info = mmap_directory.get_cache_info();
+            println!("{:?}", cache_info);
+            assert_eq!(cache_info.counters.miss_empty, 30);
+            assert_eq!(cache_info.counters.miss_weak, 10);
+            assert_eq!(cache_info.mmapped.len(), 10);
+
+            for saved_readmmap in saved_readmmaps {
+                assert_eq!(saved_readmmap.as_slice(), content);
+            }
+        }
+
+        assert_eq!(mmap_directory.get_cache_info().mmapped.len(), 0);
+        
+    }
+
 }
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -3,6 +3,7 @@ mod ram_directory;
 mod directory;
 mod read_only_source;
 mod shared_vec_slice;
+mod managed_directory;

 /// Errors specific to the directory module.
 pub mod error;
@@ -14,6 +15,7 @@ pub use self::read_only_source::ReadOnlySource;
 pub use self::directory::Directory;
 pub use self::ram_directory::RAMDirectory;
 pub use self::mmap_directory::MmapDirectory;
+pub use self::managed_directory::{ManagedDirectory, FileProtection};

 /// Synonym of Seek + Write
 pub trait SeekableWrite: Seek + Write {}
@@ -58,31 +60,37 @@ mod tests {

    fn test_simple(directory: &mut Directory) {
        {
-            let mut write_file = directory.open_write(*TEST_PATH).unwrap();
-            assert!(directory.exists(*TEST_PATH));
-            write_file.write_all(&[4]).unwrap();
-            write_file.write_all(&[3]).unwrap();
-            write_file.write_all(&[7,3,5]).unwrap();
-            write_file.flush().unwrap();
+            {
+                let mut write_file = directory.open_write(*TEST_PATH).unwrap();
+                assert!(directory.exists(*TEST_PATH));
+                write_file.write_all(&[4]).unwrap();
+                write_file.write_all(&[3]).unwrap();
+                write_file.write_all(&[7,3,5]).unwrap();
+                write_file.flush().unwrap();
+            }
+            let read_file = directory.open_read(*TEST_PATH).unwrap();
+            let data: &[u8] = &*read_file;
+            assert_eq!(data, &[4u8, 3u8, 7u8, 3u8, 5u8]);
        }
-        let read_file = directory.open_read(*TEST_PATH).unwrap();
-        let data: &[u8] = &*read_file;
-        assert_eq!(data, &[4u8, 3u8, 7u8, 3u8, 5u8]);
+
        assert!(directory.delete(*TEST_PATH).is_ok());
        assert!(!directory.exists(*TEST_PATH));
    }

    fn test_seek(directory: &mut Directory) {
        {
-            let mut write_file = directory.open_write(*TEST_PATH).unwrap();
-            write_file.write_all(&[4, 3, 7,3,5]).unwrap();
-            write_file.seek(SeekFrom::Start(0)).unwrap();
-            write_file.write_all(&[3,1]).unwrap();
-            write_file.flush().unwrap();
+            {
+                let mut write_file = directory.open_write(*TEST_PATH).unwrap();
+                write_file.write_all(&[4, 3, 7,3,5]).unwrap();
+                write_file.seek(SeekFrom::Start(0)).unwrap();
+                write_file.write_all(&[3,1]).unwrap();
+                write_file.flush().unwrap();
+            }
+            let read_file = directory.open_read(*TEST_PATH).unwrap();
+            let data: &[u8] = &*read_file;
+            assert_eq!(data, &[3u8, 1u8, 7u8, 3u8, 5u8]);
        }
-        let read_file = directory.open_read(*TEST_PATH).unwrap();
-        let data: &[u8] = &*read_file;
-        assert_eq!(data, &[3u8, 1u8, 7u8, 3u8, 5u8]);
+
        assert!(directory.delete(*TEST_PATH).is_ok());
    }

@@ -111,19 +119,32 @@ mod tests {
        }
    }

-    fn test_delete(directory: &mut Directory) {
+    fn test_directory_delete(directory: &mut Directory) {
        assert!(directory.open_read(*TEST_PATH).is_err());
        let mut write_file = directory.open_write(*TEST_PATH).unwrap();
        write_file.write_all(&[1, 2, 3, 4]).unwrap();
        write_file.flush().unwrap();
-        let read_handle = directory.open_read(*TEST_PATH).unwrap();  
        {
-            assert_eq!(&*read_handle, &[1u8, 2u8, 3u8, 4u8]);
-            assert!(directory.delete(*TEST_PATH).is_ok());
-            assert!(directory.delete(Path::new("SomeOtherPath")).is_err());
-            assert_eq!(&*read_handle, &[1u8, 2u8, 3u8, 4u8]);
+            let read_handle = directory.open_read(*TEST_PATH).unwrap();
+            {
+                assert_eq!(&*read_handle, &[1u8, 2u8, 3u8, 4u8]);
+
+                // Mapped files can't be deleted on Windows
+                if !cfg!(windows) {
+                    assert!(directory.delete(*TEST_PATH).is_ok());
+                    assert_eq!(&*read_handle, &[1u8, 2u8, 3u8, 4u8]);
+                }
+
+                assert!(directory.delete(Path::new("SomeOtherPath")).is_err());
+            }
        }
+
+        if cfg!(windows) {
+            assert!(directory.delete(*TEST_PATH).is_ok());
+        }
+
        assert!(directory.open_read(*TEST_PATH).is_err());
+        assert!(directory.delete(*TEST_PATH).is_err());
    }

    fn test_directory(directory: &mut Directory) {
@@ -131,7 +152,7 @@ mod tests {
        test_seek(directory);
        test_rewrite_forbidden(directory);
        test_write_create_the_file(directory);
-        test_delete(directory);
+        test_directory_delete(directory);
    }

 }
--- a/src/directory/ram_directory.rs
+++ b/src/directory/ram_directory.rs
@@ -6,7 +6,7 @@ use std::result;
 use std::sync::{Arc, RwLock};
 use common::make_io_err;
 use directory::{Directory, ReadOnlySource};
-use directory::error::{OpenWriteError, FileError};
+use directory::error::{OpenWriteError, OpenReadError, DeleteError};
 use directory::WritePtr;
 use super::shared_vec_slice::SharedVecSlice;

@@ -55,7 +55,7 @@ impl Seek for VecWriter {
 impl Write for VecWriter {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        self.is_flushed = false;
-        try!(self.data.write(buf));
+        try!(self.data.write_all(buf));
        Ok(buf.len())
    }

@@ -87,29 +87,29 @@ impl InnerDirectory {
        Ok(prev_value.is_some())
    }

-    fn open_read(&self, path: &Path) -> Result<ReadOnlySource, FileError> { 
+    fn open_read(&self, path: &Path) -> Result<ReadOnlySource, OpenReadError> { 
        self.0
            .read()
            .map_err(|_| {
                let io_err = make_io_err(format!("Failed to acquire read lock for the directory, when trying to read {:?}", path));
-                FileError::IOError(io_err)
+                OpenReadError::IOError(io_err)
            })
            .and_then(|readable_map| {
                readable_map
                .get(path)
-                .ok_or_else(|| FileError::FileDoesNotExist(PathBuf::from(path)))
+                .ok_or_else(|| OpenReadError::FileDoesNotExist(PathBuf::from(path)))
                .map(|data| {
                    ReadOnlySource::Anonymous(SharedVecSlice::new(data.clone()))
                })
            })
    }

-    fn delete(&self, path: &Path) -> result::Result<(), FileError> {
+    fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
        self.0
            .write()
            .map_err(|_| {
                let io_err = make_io_err(format!("Failed to acquire write lock for the directory, when trying to delete {:?}", path));
-                FileError::IOError(io_err)
+                DeleteError::IOError(io_err)
            })
            .and_then(|mut writable_map| {
                match writable_map.remove(path) {
@@ -117,7 +117,7 @@ impl InnerDirectory {
                        Ok(())
                    },
                    None => {
-                        Err(FileError::FileDoesNotExist(PathBuf::from(path)))
+                        Err(DeleteError::FileDoesNotExist(PathBuf::from(path)))
                    }
                }
            })
@@ -160,7 +160,7 @@ impl RAMDirectory {
 }

 impl Directory for RAMDirectory {
-    fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, FileError> {
+    fn open_read(&self, path: &Path) -> result::Result<ReadOnlySource, OpenReadError> {
        self.fs.open_read(path)
    }
    
@@ -176,7 +176,7 @@ impl Directory for RAMDirectory {
        }
    }

-    fn delete(&self, path: &Path) -> result::Result<(), FileError> {
+    fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
        self.fs.delete(path)
    }

@@ -185,6 +185,12 @@ impl Directory for RAMDirectory {
        self.fs.exists(path)
    }

+    fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> {
+        let read = self.open_read(path)?;
+        Ok(read.as_slice()
+               .to_owned())
+    }
+
    fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
        let path_buf = PathBuf::from(path);
        let mut vec_writer = VecWriter::new(path_buf.clone(), self.fs.clone());
--- a/src/directory/read_only_source.rs
+++ b/src/directory/read_only_source.rs
@@ -79,3 +79,10 @@ impl Clone for ReadOnlySource {
        self.slice(0, self.len())
    }
 }
+
+impl From<Vec<u8>> for ReadOnlySource {
+    fn from(data: Vec<u8>) -> ReadOnlySource {
+        let shared_data = SharedVecSlice::from(data);
+        ReadOnlySource::Anonymous(shared_data)
+    }
+}
--- a/src/directory/shared_vec_slice.rs
+++ b/src/directory/shared_vec_slice.rs
@@ -35,3 +35,9 @@ impl SharedVecSlice {
        }
    }
 }
+
+impl From<Vec<u8>> for SharedVecSlice {
+    fn from(data: Vec<u8>) -> SharedVecSlice {
+        SharedVecSlice::new(Arc::new(data))
+    }
+}
--- a/src/error.rs
+++ b/src/error.rs
@@ -3,17 +3,15 @@
 /// Definition of Tantivy's error and result.

 use std::io;
-use std::result;
+
 use std::path::PathBuf;
 use std::error;
 use std::sync::PoisonError;
-use directory::error::{FileError, OpenWriteError, OpenDirectoryError};
+use directory::error::{OpenReadError, OpenWriteError, OpenDirectoryError};
 use query;
 use schema;
-
-
-/// Tantivy result.
-pub type Result<T> = result::Result<T, Error>;
+use fastfield::FastFieldNotAvailableError;
+use serde_json;


 /// Generic tantivy error.
@@ -32,11 +30,21 @@ pub enum Error {
    /// The data within is corrupted.
    ///
    /// For instance, it contains invalid JSON.
-    CorruptedFile(PathBuf, Box<error::Error + Send>),
+    CorruptedFile(PathBuf, Box<error::Error + Send + Sync>),
    /// Invalid argument was passed by the user.
    InvalidArgument(String),
    /// An Error happened in one of the thread
-    ErrorInThread(String), // TODO investigate better solution
+    ErrorInThread(String),
+    /// An Error appeared related to the lack of a field.
+    SchemaError(String),
+    /// Tried to access a fastfield reader for a field not configured accordingly.
+    FastFieldError(FastFieldNotAvailableError)
+}
+
+impl From<FastFieldNotAvailableError> for Error {
+    fn from(fastfield_error: FastFieldNotAvailableError) -> Error {
+        Error::FastFieldError(fastfield_error)
+    }
 }

 impl From<io::Error> for Error {
@@ -57,11 +65,11 @@ impl<Guard> From<PoisonError<Guard>> for Error {
    }
 }

-impl From<FileError> for Error {
-    fn from(error: FileError) -> Error {
+impl From<OpenReadError> for Error {
+    fn from(error: OpenReadError) -> Error {
        match error {
-            FileError::FileDoesNotExist(filepath) => Error::PathDoesNotExist(filepath),
-            FileError::IOError(io_error) => Error::IOError(io_error),
+            OpenReadError::FileDoesNotExist(filepath) => Error::PathDoesNotExist(filepath),
+            OpenReadError::IOError(io_error) => Error::IOError(io_error),
        }
    }
 }
@@ -93,3 +101,9 @@ impl From<OpenDirectoryError> for Error {
        }
    }
 }
+
+impl From<serde_json::Error> for Error {
+    fn from(error: serde_json::Error) -> Error {
+        Error::IOError(error.into())
+    }
+}
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -0,0 +1,135 @@
+use bit_set::BitSet;
+use directory::WritePtr;
+use std::io::Write;
+use std::io;
+use directory::ReadOnlySource;
+use DocId;
+use common::HasLen;
+
+/// Write a delete BitSet
+///
+/// where `delete_bitset` is the set of deleted `DocId`.
+pub fn write_delete_bitset(delete_bitset: &BitSet, writer: &mut WritePtr) -> io::Result<()> {
+    let max_doc = delete_bitset.capacity();
+    let mut byte = 0u8;
+    let mut shift = 0u8;
+    for doc in 0..max_doc {
+        if delete_bitset.contains(doc) {
+            byte |= 1 << shift;
+        }
+        if shift == 7 {
+            writer.write_all(&[byte])?;
+            shift = 0;
+            byte = 0;
+        }
+        else {
+            shift += 1;
+        }
+    }
+    if max_doc % 8 > 0 {
+        writer.write_all(&[byte])?;
+    }
+    writer.flush()
+}
+
+/// Set of deleted `DocId`s.
+#[derive(Clone)]
+pub struct DeleteBitSet {
+    data: ReadOnlySource,
+    len: usize,  
+}
+
+
+impl DeleteBitSet {
+    /// Opens a delete bitset given its data source.
+    pub fn open(data: ReadOnlySource) -> DeleteBitSet {
+        let num_deleted: usize = data
+            .as_slice()
+            .iter()
+            .map(|b| b.count_ones() as usize)
+            .sum();
+        DeleteBitSet {
+            data: data,
+            len: num_deleted,
+        }
+    }
+
+    /// Returns an empty delete bit set.
+    pub fn empty() -> DeleteBitSet {
+        DeleteBitSet {
+            data: ReadOnlySource::empty(),
+            len: 0,
+        }
+    }
+
+    /// Returns true iff the segment has some deleted documents.
+    pub fn has_deletes(&self) -> bool {
+        self.len() > 0
+    }
+
+    /// Returns true iff the document is deleted.
+    pub fn is_deleted(&self, doc: DocId) -> bool {
+        if self.len == 0 {
+            false
+        }
+        else {
+            let byte_offset = doc / 8u32;
+            let b: u8 = (*self.data)[byte_offset as usize];
+            let shift = (doc & 7u32) as u8;
+            b & (1u8 << shift) != 0    
+        }
+    }
+
+}
+
+
+impl HasLen for DeleteBitSet {
+    fn len(&self) -> usize {
+        self.len
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::path::PathBuf;
+    use bit_set::BitSet;
+    use directory::*;
+    use super::*;
+
+    fn test_delete_bitset_helper(bitset: &BitSet) {
+        let test_path = PathBuf::from("test");
+        let mut directory = RAMDirectory::create();
+        {
+            let mut writer = directory.open_write(&*test_path).unwrap();
+            write_delete_bitset(bitset, &mut writer).unwrap();
+        }
+        {
+            let source = directory.open_read(&test_path).unwrap();
+            let delete_bitset = DeleteBitSet::open(source);
+            let n = bitset.capacity();
+            for doc in 0..n {
+                assert_eq!(bitset.contains(doc), delete_bitset.is_deleted(doc as DocId));
+            }
+            assert_eq!(delete_bitset.len(), bitset.len());
+        }
+    }
+
+    #[test]
+    fn test_delete_bitset() {
+        {
+            let mut bitset = BitSet::with_capacity(10);
+            bitset.insert(1);
+            bitset.insert(9);
+            test_delete_bitset_helper(&bitset);
+        }
+        {
+            let mut bitset = BitSet::with_capacity(8);
+            bitset.insert(1);
+            bitset.insert(2);
+            bitset.insert(3);
+            bitset.insert(5);
+            bitset.insert(7);
+            test_delete_bitset_helper(&bitset);
+        }
+    }
+}
--- a/Show More
+++ b/Show More