diff --git a/.gitmodules b/.gitmodules
index 90a028084..169b9dd7b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,3 @@
-[submodule "cpp/SIMDCompressionAndIntersection"]
-	path = cpp/SIMDCompressionAndIntersection
-	url = git@github.com:lemire/SIMDCompressionAndIntersection.git
 [submodule "cpp/simdcomp"]
 	path = cpp/simdcomp
 	url = git@github.com:lemire/simdcomp.git
diff --git a/build.rs b/build.rs
index c7a692574..aa7b39004 100644
--- a/build.rs
+++ b/build.rs
@@ -4,37 +4,17 @@ extern crate gcc;
 use std::process::Command;
 
 fn main() {
-    
-    Command::new("make")
-        .current_dir("cpp/SIMDCompressionAndIntersection")
-        .output()
-        .unwrap_or_else(|e| { panic!("Failed to make SIMDCompressionAndIntersection: {}", e) });
-
     Command::new("make")
         .current_dir("cpp/simdcomp")
         .output()
         .unwrap_or_else(|e| { panic!("Failed to make simdcomp: {}", e) });
     
-    
     gcc::Config::new()
                 .cpp(true)
                 .flag("-std=c++11")
                 .flag("-O3")
                 .flag("-mssse3")
-                .include("./cpp/SIMDCompressionAndIntersection/include")
                 .include("./cpp/simdcomp/include")
-                .object("cpp/SIMDCompressionAndIntersection/bitpacking.o")
-                .object("cpp/SIMDCompressionAndIntersection/integratedbitpacking.o")
-                .object("cpp/SIMDCompressionAndIntersection/simdbitpacking.o")
-                .object("cpp/SIMDCompressionAndIntersection/usimdbitpacking.o")
-                .object("cpp/SIMDCompressionAndIntersection/simdintegratedbitpacking.o")
-                .object("cpp/SIMDCompressionAndIntersection/intersection.o")
-                .object("cpp/SIMDCompressionAndIntersection/varintdecode.o")
-                .object("cpp/SIMDCompressionAndIntersection/streamvbyte.o")
-                .object("cpp/SIMDCompressionAndIntersection/simdpackedsearch.o")
-                .object("cpp/SIMDCompressionAndIntersection/simdpackedselect.o")
-                .object("cpp/SIMDCompressionAndIntersection/frameofreference.o")
-                .object("cpp/SIMDCompressionAndIntersection/for.o")
                 .object("cpp/simdcomp/avxbitpacking.o")
                 .object("cpp/simdcomp/simdintegratedbitpacking.o")
                 .object("cpp/simdcomp/simdbitpacking.o")
@@ -42,8 +22,7 @@ fn main() {
                 .object("cpp/simdcomp/simdcomputil.o")
                 .object("cpp/simdcomp/simdpackedselect.o")
                 .object("cpp/simdcomp/simdfor.o")
-                .file("cpp/encode.cpp")
                 .file("cpp/simdcomp_wrapper.cpp")
-                .compile("libsimdcompression.a");
+                .compile("libsimdcomp.a");
     println!("cargo:rustc-flags=-l dylib=stdc++");
 }
diff --git a/cpp/SIMDCompressionAndIntersection b/cpp/SIMDCompressionAndIntersection
deleted file mode 160000
index 1f8e12aeb..000000000
--- a/cpp/SIMDCompressionAndIntersection
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 1f8e12aebd0a845b49fc8b1765d69c6fde118d4d
diff --git a/src/compression/block128.rs b/src/compression/block128.rs
deleted file mode 100644
index 57321a792..000000000
--- a/src/compression/block128.rs
+++ /dev/null
@@ -1,177 +0,0 @@
-
-use libc::size_t;
-use std::ptr;
-
-extern {
-    fn encode_sorted_block128_native(data: *mut u32, output: *mut u8, output_capacity: size_t) -> size_t;
-    fn decode_sorted_block128_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32) -> usize;
-
-    fn encode_block128_native(data: *mut u32, output: *mut u8, output_capacity: size_t) -> size_t;
-    fn decode_block128_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32) -> usize;
-
-    fn encode_sorted_vint_native(data: *mut u32, num_els: size_t, output: *mut u8, output_capacity: size_t) -> size_t;
-    fn decode_sorted_vint_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
-
-}
-
-//-------------------------
-// Block128
-
-pub struct Block128Encoder {
-    input_buffer: [u32; 128],
-    output_buffer: [u8; 256 * 4],
-}
-
-impl Block128Encoder {
-
-    pub fn new() -> Block128Encoder {
-        Block128Encoder {
-            input_buffer: [0u32; 128],
-            output_buffer: [0u8; 256 * 4],
-        }
-    }
-
-    pub fn encode(&mut self, input: &[u32]) -> &[u8] {
-        assert_eq!(input.len(), 128);
-        // TODO use clone_from when available
-        let written_size: usize;
-        unsafe {
-            ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), 128);
-            written_size = encode_block128_native(
-                self.input_buffer.as_mut_ptr(),
-                self.output_buffer.as_mut_ptr(),
-                256 * 4,
-            );
-        }
-        return &self.output_buffer[0..written_size];
-    }
-
-    pub fn encode_sorted(&mut self, input: &[u32]) -> &[u8] {
-        assert_eq!(input.len(), 128);
-        // TODO use clone_from when available
-        let written_size: usize;
-        unsafe {
-            ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), 128);
-            written_size = encode_sorted_block128_native(
-                self.input_buffer.as_mut_ptr(),
-                self.output_buffer.as_mut_ptr(),
-                256 * 4,
-            );
-        }
-        return &self.output_buffer[0..written_size];
-    }
-}
-
-pub struct Block128Decoder {
-    output: [u32; 128],
-}
-
-impl Block128Decoder {
-
-    pub fn new() -> Block128Decoder {
-        Block128Decoder {
-            output: [0u32; 128]
-        }
-    }
-    
-    pub fn decode<'a, 'b>(
-          &'b mut self,
-          compressed_data: &'a [u8]) -> &'a[u8] {
-        unsafe {
-            let consumed_num_bytes: usize = decode_block128_native(
-                        compressed_data.as_ptr(),
-                        compressed_data.len() as size_t,
-                        self.output.as_mut_ptr());
-            &compressed_data[consumed_num_bytes..]
-        }
-    }
-
-    pub fn decode_sorted<'a, 'b>(
-          &'b mut self,
-          compressed_data: &'a [u8]) -> &'a [u8] {
-        unsafe {
-            let consumed_num_bytes: usize = decode_sorted_block128_native(
-                        compressed_data.as_ptr(),
-                        compressed_data.len() as size_t,
-                        self.output.as_mut_ptr());
-            &compressed_data[consumed_num_bytes..]
-        }
-    }
-    
-    pub fn decode_sorted_remaining(&mut self,
-        compressed_data: &[u8]) -> &[u32] {
-        unsafe {
-            let num_uncompressed = decode_sorted_vint_native(
-                compressed_data.as_ptr(),
-                compressed_data.len() as size_t,
-                self.output.as_mut_ptr(),
-                128);
-            &self.output[..num_uncompressed]
-        }
-    }
-    
-    pub fn output(&self,) -> &[u32; 128] {
-        &self.output
-    }
-}
-
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-
-    #[test]
-    fn test_encode_sorted_block() {
-        for num_extra_values in [0, 2, 11].into_iter() {
-            let mut encoder = Block128Encoder::new();
-            let mut input = [0u32; 128];
-            for i in 0u32..128u32 {
-                input[i as usize] = i * 7 / 2;
-            }
-            let mut encoded_vec: Vec<u8> = encoder.encode_sorted(&input).to_vec();
-            assert_eq!(encoded_vec.len(), 84);
-            for i in 0u8..*num_extra_values as u8 {
-                encoded_vec.push(i);
-            }
-            let mut decoder = Block128Decoder::new();
-            let remaining_input = decoder.decode_sorted(&encoded_vec[..]);
-            let uncompressed_values = decoder.output();
-            assert_eq!(remaining_input.len(), *num_extra_values);
-            for i in 0..128 {
-                assert_eq!(uncompressed_values[i], input[i]);
-            }
-            for i in 0..*num_extra_values {
-                assert_eq!(remaining_input[i], i as u8);
-            }
-        }
-    }
-
-    #[test]
-    fn test_encode_block() {
-        for num_extra_values in [0, 2, 11].into_iter() {
-            let mut encoder = Block128Encoder::new();
-            let mut input = [0u32; 128];
-            for i in 0u32..128u32 {
-                input[i as usize] = i * 7 % 31;
-            }
-            let mut encoded_vec: Vec<u8> = encoder.encode(&input).to_vec();
-            assert_eq!(encoded_vec.len(), 100);
-            for i in 0u8..*num_extra_values as u8 {
-                encoded_vec.push(i);
-            }
-            let mut decoder = Block128Decoder::new();
-            let remaining_input: &[u8] = decoder.decode(&encoded_vec[..]);
-            let uncompressed_values = decoder.output();
-            assert_eq!(remaining_input.len(), *num_extra_values);
-            for i in 0..128 {
-                assert_eq!(uncompressed_values[i], input[i]);
-            }
-            for i in 0..*num_extra_values {
-                assert_eq!(remaining_input[i], i as u8);
-            }
-        }
-    }
-
-
-}
diff --git a/src/compression/composite.rs b/src/compression/composite.rs
new file mode 100644
index 000000000..731db2bee
--- /dev/null
+++ b/src/compression/composite.rs
@@ -0,0 +1,88 @@
+use compression::SIMDBlockEncoder;
+use compression::SIMDBlockDecoder;
+use super::NUM_DOCS_PER_BLOCK;
+
+pub struct CompositeEncoder {
+    block_encoder: SIMDBlockEncoder,
+    output: Vec<u8>,
+}
+
+impl CompositeEncoder {
+    
+    pub fn new() -> CompositeEncoder {
+        CompositeEncoder {
+            block_encoder: SIMDBlockEncoder::new(),
+            output: Vec::new(),
+        }
+    }
+    
+    pub fn compress_sorted(&mut self, vals: &[u32]) -> &[u8] {
+        self.output.clear();
+        let num_blocks = vals.len() / NUM_DOCS_PER_BLOCK;
+        let mut offset = 0u32;
+        for i in 0..num_blocks {
+            let vals_slice = &vals[i * NUM_DOCS_PER_BLOCK .. (i + 1) * NUM_DOCS_PER_BLOCK];
+            let block_compressed = self.block_encoder.compress_block_sorted(&vals_slice, offset);
+            offset = vals_slice[NUM_DOCS_PER_BLOCK - 1];
+            self.output.extend_from_slice(block_compressed);
+        }
+        let vint_compressed = self.block_encoder.compress_vint_sorted(&vals[num_blocks * NUM_DOCS_PER_BLOCK..], offset);
+        self.output.extend_from_slice(vint_compressed);
+        &self.output
+    }
+    
+    pub fn compress_unsorted(&mut self, vals: &[u32]) -> &[u8] {
+        self.output.clear();
+        let num_blocks = vals.len() / NUM_DOCS_PER_BLOCK;
+        for i in 0..num_blocks {
+            let vals_slice = &vals[i * NUM_DOCS_PER_BLOCK .. (i + 1) * NUM_DOCS_PER_BLOCK];
+            let block_compressed = self.block_encoder.compress_block_unsorted(&vals_slice);
+            self.output.extend_from_slice(block_compressed);
+        }
+        let vint_compressed = self.block_encoder.compress_vint_unsorted(&vals[num_blocks * NUM_DOCS_PER_BLOCK..]);
+        self.output.extend_from_slice(vint_compressed);
+        &self.output
+    }
+}
+
+
+pub struct CompositeDecoder {
+    block_decoder: SIMDBlockDecoder,
+    vals: Vec<u32>,
+}
+
+
+impl CompositeDecoder {
+    pub fn new() -> CompositeDecoder {
+        CompositeDecoder {
+            block_decoder: SIMDBlockDecoder::new(),
+            vals: Vec::new(),
+        }    
+    }
+    
+    pub fn uncompress_sorted(&mut self, mut compressed_data: &[u8], doc_freq: usize) -> &[u32] {
+        let mut offset = 0u32;
+        self.vals.clear();
+        let num_blocks = doc_freq / NUM_DOCS_PER_BLOCK;
+        for _ in 0..num_blocks {
+            compressed_data = self.block_decoder.uncompress_block_sorted(compressed_data, offset);
+            offset = self.block_decoder.output()[NUM_DOCS_PER_BLOCK - 1];
+            self.vals.extend_from_slice(self.block_decoder.output());
+        }
+        self.block_decoder.uncompress_vint_sorted(compressed_data, offset, doc_freq % NUM_DOCS_PER_BLOCK);
+        self.vals.extend_from_slice(self.block_decoder.output());
+        &self.vals
+    }
+    
+    pub fn uncompress_unsorted(&mut self, mut compressed_data: &[u8], doc_freq: usize) -> &[u32] {
+        self.vals.clear();
+        let num_blocks = doc_freq / NUM_DOCS_PER_BLOCK;
+        for _ in 0..num_blocks {
+            compressed_data = self.block_decoder.uncompress_block_unsorted(compressed_data);
+            self.vals.extend_from_slice(self.block_decoder.output());
+        }
+        self.block_decoder.uncompress_vint_unsorted(compressed_data, doc_freq % NUM_DOCS_PER_BLOCK);
+        self.vals.extend_from_slice(self.block_decoder.output());
+        &self.vals
+    }
+}
diff --git a/src/compression/intersection.rs b/src/compression/intersection.rs
deleted file mode 100644
index ddc24df42..000000000
--- a/src/compression/intersection.rs
+++ /dev/null
@@ -1,14 +0,0 @@
-use libc::size_t;
-
-extern {
-    fn intersection_native(left_data: *const u32, left_size: size_t, right_data: *const u32, right_size: size_t, output: *mut u32) -> size_t;
-}
-
-pub fn intersection(left: &[u32], right: &[u32], output: &mut [u32]) -> usize {
-    unsafe {
-        intersection_native(
-            left.as_ptr(), left.len(),
-            right.as_ptr(), right.len(),
-            output.as_mut_ptr())
-    }
-}
diff --git a/src/compression/mod.rs b/src/compression/mod.rs
index 4ed8c9512..c5daf0688 100644
--- a/src/compression/mod.rs
+++ b/src/compression/mod.rs
@@ -1,18 +1,10 @@
 #![allow(dead_code)]
 
-mod intersection;
-pub use self::intersection::intersection;
-
-mod s4bp128;
-pub use self::s4bp128::{S4BP128Encoder, S4BP128Decoder};
-
-mod block128;
-pub use self::block128::{Block128Encoder, Block128Decoder};
-
-mod vints;
-pub use self::vints::{VIntsEncoder, VIntsDecoder};
-
 mod simdcomp;
+pub use self::simdcomp::{SIMDBlockEncoder, SIMDBlockDecoder};
+
+mod composite;
+pub use self::composite::CompositeEncoder;
 
 pub const NUM_DOCS_PER_BLOCK: usize = 128;
 
diff --git a/src/compression/s4bp128.rs b/src/compression/s4bp128.rs
deleted file mode 100644
index 4a9a19f97..000000000
--- a/src/compression/s4bp128.rs
+++ /dev/null
@@ -1,164 +0,0 @@
-
-use libc::size_t;
-use std::ptr;
-
-
-
-extern {
-    // complete s4-bp128-dm
-    fn encode_s4_bp128_dm_native(data: *mut u32, num_els: size_t, output: *mut u8, output_capacity: size_t) -> size_t;
-    fn decode_s4_bp128_dm_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
-
-    fn encode_composite_native(data: *mut u32, num_els: size_t, output: *mut u8, output_capacity: size_t) -> size_t;
-    fn decode_composite_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
-
-}
-
-//-------------------------
-// s4-bp128-dm
-
-pub struct S4BP128Encoder {
-    input_buffer: Vec<u32>,
-    output_buffer: Vec<u8>,
-}
-
-impl S4BP128Encoder {
-    pub fn new() -> S4BP128Encoder {
-        S4BP128Encoder {
-            input_buffer: Vec::new(),
-            output_buffer: Vec::new(),
-        }
-    }
-    
-    pub fn encode(&mut self, input: &[u32]) -> &[u8] {
-        self.input_buffer.clear();
-        let input_len = input.len();
-        if input_len + 10000 >= self.input_buffer.len() {
-            let target_length = input_len + 1024;
-            self.input_buffer.resize(target_length, 0);
-            self.output_buffer.resize(target_length * 4, 0);
-        }
-        // TODO use clone_from when available
-        let written_size;
-        unsafe {
-            ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
-            written_size = encode_composite_native(
-                self.input_buffer.as_mut_ptr(),
-                input_len as size_t,
-                self.output_buffer.as_mut_ptr(),
-                self.output_buffer.len() as size_t,
-            );
-        }
-        &self.output_buffer[0..written_size]
-    }    
-     
-    pub fn encode_sorted(&mut self, input: &[u32]) -> &[u8] {
-        self.input_buffer.clear();
-        let input_len = input.len();
-        if input_len + 10000 >= self.input_buffer.len() {
-            let target_length = input_len + 1024;
-            self.input_buffer.resize(target_length, 0);
-            self.output_buffer.resize(target_length * 4, 0);
-        }
-        // TODO use clone_from when available
-        let written_size;
-        unsafe {
-            ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
-            written_size = encode_s4_bp128_dm_native(
-                self.input_buffer.as_mut_ptr(),
-                input_len as size_t,
-                self.output_buffer.as_mut_ptr(),
-                self.output_buffer.len() as size_t,
-            );
-        }
-        return &self.output_buffer[0..written_size];
-    }
-}
-
-pub struct S4BP128Decoder;
-
-impl S4BP128Decoder {
-
-    pub fn new() -> S4BP128Decoder {
-        S4BP128Decoder
-    }
-
-    pub fn decode_sorted(&self,
-                  compressed_data: &[u8],
-                  uncompressed_values: &mut [u32]) -> size_t {
-        unsafe {
-            return decode_s4_bp128_dm_native(
-                        compressed_data.as_ptr(),
-                        compressed_data.len() as size_t,
-                        uncompressed_values.as_mut_ptr(),
-                        uncompressed_values.len() as size_t);
-        }
-    }
-    
-    pub fn decode(&self,
-            compressed_data: &[u8],
-            uncompressed_values: &mut [u32]) -> size_t {
-        unsafe {
-            return decode_composite_native(
-                        compressed_data.as_ptr(),
-                        compressed_data.len() as size_t,
-                        uncompressed_values.as_mut_ptr(),
-                        uncompressed_values.len() as size_t);
-        }
-    }
-}
-
-
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use test::Bencher;
-    use compression::tests::generate_array;
-
-    #[test]
-    fn test_encode_sorted_big() {
-        let mut encoder = S4BP128Encoder::new();
-        let num_ints = 10_000 as usize;
-        let expected_length = 5_096;
-        let input: Vec<u32> = (0..num_ints as u32)
-            .map(|i| i * 7 / 2)
-            .into_iter().collect();
-        let encoded_data = encoder.encode_sorted(&input);
-        assert_eq!(encoded_data.len(), expected_length);
-        let decoder = S4BP128Decoder::new();
-        let mut decoded_data: Vec<u32> = (0..num_ints as u32).collect();
-        assert_eq!(num_ints, decoder.decode_sorted(&encoded_data[..], &mut decoded_data));
-        assert_eq!(decoded_data, input);
-    }
-    
-    #[test]
-    fn test_encode_unsorted_big() {
-        let mut encoder = S4BP128Encoder::new();
-        let num_ints = 10_000 as usize;
-        let expected_length = 7_588;
-        let input: Vec<u32> = (0..num_ints as u32)
-            .map(|i| i * 7 % 37)
-            .into_iter().collect();
-        let encoded_data = encoder.encode(&input);
-        assert_eq!(encoded_data.len(), expected_length);
-        let decoder = S4BP128Decoder::new();
-        let mut decoded_data: Vec<u32> = (0..num_ints as u32).collect();
-        assert_eq!(num_ints, decoder.decode(&encoded_data[..], &mut decoded_data));
-        assert_eq!(decoded_data, input);
-    }
-
-    #[bench]
-    fn bench_decode(b: &mut Bencher) {
-        const TEST_SIZE: usize = 1_000_000;
-        let arr = generate_array(TEST_SIZE, 0.1);
-        let mut encoder = S4BP128Encoder::new();
-        let encoded = encoder.encode_sorted(&arr);
-        let mut uncompressed: Vec<u32> = (0..TEST_SIZE as u32).collect();
-        let decoder = S4BP128Decoder;
-        b.iter(|| {
-            decoder.decode_sorted(&encoded, &mut uncompressed);
-        });
-    }
-}
diff --git a/src/compression/simdcomp.rs b/src/compression/simdcomp.rs
index d2f55aa47..477a00667 100644
--- a/src/compression/simdcomp.rs
+++ b/src/compression/simdcomp.rs
@@ -1,4 +1,7 @@
 use libc::size_t;
+use super::NUM_DOCS_PER_BLOCK;
+
+const COMPRESSED_BLOCK_MAX_SIZE: usize = NUM_DOCS_PER_BLOCK * 4 + 1; 
 
 extern {
     // complete s4-bp128-dm
@@ -21,56 +24,151 @@ extern {
         output: *mut u32) -> size_t;
 }
 
-const BLOCK_SIZE: usize = 128;
-const COMPRESSED_BLOCK_MAX_SIZE: usize = BLOCK_SIZE * 4 + 1; 
 
 pub struct SIMDBlockEncoder {
-    output_buffer: [u8; COMPRESSED_BLOCK_MAX_SIZE],
+    output: [u8; COMPRESSED_BLOCK_MAX_SIZE],
+    output_len: usize,
 }
 
 impl SIMDBlockEncoder {
     
     pub fn new() -> SIMDBlockEncoder {
         SIMDBlockEncoder {
-            output_buffer: [0u8; COMPRESSED_BLOCK_MAX_SIZE]
+            output: [0u8; COMPRESSED_BLOCK_MAX_SIZE],
+            output_len: 0,
         }    
     }
     
-    pub fn compress_sorted(&mut self, vals: &[u32], offset: u32) -> &[u8] {
-        let compressed_size = unsafe { compress_sorted_cpp(vals.as_ptr(), self.output_buffer.as_mut_ptr(), offset) };
-        &self.output_buffer[..compressed_size]
+    pub fn compress_block_sorted(&mut self, vals: &[u32], offset: u32) -> &[u8] {
+        let compressed_size = unsafe { compress_sorted_cpp(vals.as_ptr(), self.output.as_mut_ptr(), offset) };
+        &self.output[..compressed_size]
     }
     
-    pub fn compress_unsorted(&mut self, vals: &[u32]) -> &[u8] {
-        let compressed_size = unsafe { compress_unsorted_cpp(vals.as_ptr(), self.output_buffer.as_mut_ptr()) };
-        &self.output_buffer[..compressed_size]
+    pub fn compress_block_unsorted(&mut self, vals: &[u32]) -> &[u8] {
+        let compressed_size = unsafe { compress_unsorted_cpp(vals.as_ptr(), self.output.as_mut_ptr()) };
+        &self.output[..compressed_size]
     }
+    
+    pub fn compress_vint_sorted(&mut self, input: &[u32], mut offset: u32) -> &[u8] {
+        let mut byte_written = 0;
+        for v in input.iter() {
+            let mut to_encode: u32 = *v - offset;
+            offset = *v;
+            loop {
+                let next_byte: u8 = (to_encode % 128u32) as u8;
+                to_encode /= 128u32;
+                if to_encode == 0u32 {
+                    self.output[byte_written] = next_byte | 128u8;
+                    byte_written += 1;
+                    break;
+                }
+                else {
+                    self.output[byte_written] = next_byte;
+                    byte_written += 1;
+                }
+            }
+        }
+        return &self.output[..byte_written];
+    }
+    
+    pub fn compress_vint_unsorted(&mut self, input: &[u32]) -> &[u8] {
+        let mut byte_written = 0;
+        for &i in input.iter() {
+            let mut to_encode: u32 = i;
+            loop {
+                let next_byte: u8 = (to_encode % 128u32) as u8;
+                to_encode /= 128u32;
+                if to_encode == 0u32 {
+                    self.output[byte_written] = next_byte | 128u8;
+                    byte_written += 1;
+                    break;
+                }
+                else {
+                    self.output[byte_written] = next_byte;
+                    byte_written += 1;
+                }
+            }
+        }
+        return &self.output[..byte_written];
+    }
+    
 }
 
 pub struct SIMDBlockDecoder {
-    output_buffer: [u32; COMPRESSED_BLOCK_MAX_SIZE],
+    output: [u32; COMPRESSED_BLOCK_MAX_SIZE],
+    output_len: usize,
 }
 
 
 impl SIMDBlockDecoder {
     pub fn new() -> SIMDBlockDecoder {
         SIMDBlockDecoder {
-            output_buffer: [0u32; COMPRESSED_BLOCK_MAX_SIZE]
+            output: [0u32; COMPRESSED_BLOCK_MAX_SIZE],
+            output_len: 0,
         }    
     }
     
-    pub fn uncompress_sorted<'a>(&mut self, compressed_data: &'a [u8], offset: u32) -> &'a[u8] {
-        let consumed_size = unsafe { uncompress_sorted_cpp(compressed_data.as_ptr(), self.output_buffer.as_mut_ptr(), offset) };
+    pub fn uncompress_block_sorted<'a>(&mut self, compressed_data: &'a [u8], offset: u32) -> &'a[u8] {
+        let consumed_size = unsafe { uncompress_sorted_cpp(compressed_data.as_ptr(), self.output.as_mut_ptr(), offset) };
+        self.output_len = NUM_DOCS_PER_BLOCK;
         &compressed_data[consumed_size..]
     }
     
-    pub fn uncompress_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> &'a[u8] {
-        let consumed_size = unsafe { uncompress_unsorted_cpp(compressed_data.as_ptr(), self.output_buffer.as_mut_ptr()) };
+    pub fn uncompress_block_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> &'a[u8] {
+        let consumed_size = unsafe { uncompress_unsorted_cpp(compressed_data.as_ptr(), self.output.as_mut_ptr()) };
+        self.output_len = NUM_DOCS_PER_BLOCK;
         &compressed_data[consumed_size..]
     }
     
+    pub fn uncompress_vint_sorted<'a>(
+        &mut self,
+        compressed_data: &'a [u8],
+        offset: u32,
+        num_els: usize) -> &'a [u8] {
+        let mut read_byte = 0;
+        let mut result = offset;
+        for i in 0..num_els {
+            let mut shift = 0u32;
+            loop {
+                let cur_byte = compressed_data[read_byte];
+                read_byte += 1;
+                result += ((cur_byte % 128u8) as u32) << shift;
+                if cur_byte & 128u8 != 0u8 {
+                    break;
+                }
+                shift += 7;
+            }
+            self.output[i] = result;
+        }
+        self.output_len = num_els;
+        &compressed_data[read_byte..]
+    }
+    
+    pub fn uncompress_vint_unsorted<'a>(
+        &mut self,
+        compressed_data: &'a [u8],
+        num_els: usize) -> &'a [u8] {
+        let mut read_byte = 0;
+        for i in 0..num_els {
+            let mut result = 0u32;
+            let mut shift = 0u32;
+            loop {
+                let cur_byte = compressed_data[read_byte];
+                read_byte += 1;
+                result += ((cur_byte % 128u8) as u32) << shift;
+                if cur_byte & 128u8 != 0u8 {
+                    break;
+                }
+                shift += 7;
+            }
+            self.output[i] = result;
+        }
+        self.output_len = num_els;
+        &compressed_data[read_byte..]
+    }
+    
     pub fn output(&self,) -> &[u32] {
-        &self.output_buffer
+        &self.output[..self.output_len]
     }
 }
 
@@ -84,10 +182,10 @@ mod tests {
     fn test_encode_sorted_block() {
         let vals: Vec<u32> = (0u32..128u32).map(|i| i*7).collect();
         let mut encoder = SIMDBlockEncoder::new();
-        let compressed_data = encoder.compress_sorted(&vals, 0);
+        let compressed_data = encoder.compress_block_sorted(&vals, 0);
         let mut decoder = SIMDBlockDecoder::new();
         {
-            let remaining_data = decoder.uncompress_sorted(compressed_data, 0);    
+            let remaining_data = decoder.uncompress_block_sorted(compressed_data, 0);    
             assert_eq!(remaining_data.len(), 0);
         }
         for i in 0..128 {
@@ -99,10 +197,10 @@ mod tests {
     fn test_encode_sorted_block_with_offset() {
         let vals: Vec<u32> = (0u32..128u32).map(|i| 11 + i*7).collect();
         let mut encoder = SIMDBlockEncoder::new();
-        let compressed_data = encoder.compress_sorted(&vals, 10);
+        let compressed_data = encoder.compress_block_sorted(&vals, 10);
         let mut decoder = SIMDBlockDecoder::new();
         {
-            let remaining_data = decoder.uncompress_sorted(compressed_data, 10);    
+            let remaining_data = decoder.uncompress_block_sorted(compressed_data, 10);    
             assert_eq!(remaining_data.len(), 0);
         }
         for i in 0..128 {
@@ -116,12 +214,12 @@ mod tests {
         let n = 128;
         let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32)*7u32).collect();
         let mut encoder = SIMDBlockEncoder::new();
-        let compressed_data = encoder.compress_sorted(&vals, 10);
+        let compressed_data = encoder.compress_block_sorted(&vals, 10);
         compressed.extend_from_slice(compressed_data);
         compressed.push(173u8);
         let mut decoder = SIMDBlockDecoder::new();
         {
-            let remaining_data = decoder.uncompress_sorted(&compressed, 10);    
+            let remaining_data = decoder.uncompress_block_sorted(&compressed, 10);    
             assert_eq!(remaining_data.len(), 1);
             assert_eq!(remaining_data[0], 173u8);
         }
@@ -136,12 +234,12 @@ mod tests {
         let n = 128;
         let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32)*7u32 % 12).collect();
         let mut encoder = SIMDBlockEncoder::new();
-        let compressed_data = encoder.compress_sorted(&vals, 10);
+        let compressed_data = encoder.compress_block_sorted(&vals, 10);
         compressed.extend_from_slice(compressed_data);
         compressed.push(173u8);
         let mut decoder = SIMDBlockDecoder::new();
         {
-            let remaining_data = decoder.uncompress_sorted(&compressed, 10);    
+            let remaining_data = decoder.uncompress_block_sorted(&compressed, 10);    
             assert_eq!(remaining_data.len(), 1);
             assert_eq!(remaining_data[0], 173u8);
         }
@@ -149,4 +247,38 @@ mod tests {
             assert_eq!(vals[i], decoder.output()[i]);
         }
     }
+    
+    
+    #[test]
+    fn test_encode_vint() {
+        {
+            let expected_length = 123;
+            let mut encoder = SIMDBlockEncoder::new();
+            let input: Vec<u32> = (0u32..123u32)
+                .map(|i| 4 + i * 7 / 2)
+                .into_iter()
+                .collect();
+            for offset in [0u32, 1u32, 2u32].iter() {
+                let encoded_data = encoder.compress_vint_sorted(&input, *offset);
+                assert_eq!(encoded_data.len(), expected_length);
+                let mut decoder = SIMDBlockDecoder::new();
+                let remaining_data = decoder.uncompress_vint_sorted(&encoded_data, *offset, input.len());
+                assert_eq!(0, remaining_data.len());
+                for (&decoded, &expected) in decoder.output().iter().zip(input.iter()) {
+                    assert_eq!(decoded, expected);
+                }
+            }
+        }
+        {
+            let mut encoder = SIMDBlockEncoder::new();
+            let input = vec!(3u32, 17u32, 187u32);
+            let encoded_data = encoder.compress_vint_sorted(&input, 0);
+            assert_eq!(encoded_data.len(), 4);
+            assert_eq!(encoded_data[0], 3u8 + 128u8);
+            assert_eq!(encoded_data[1], (17u8 - 3u8) + 128u8);
+            assert_eq!(encoded_data[2], (187u8 - 17u8 - 128u8));
+            assert_eq!(encoded_data[3], (1u8 + 128u8));
+        }
+    }
+
 }
diff --git a/src/compression/vints.rs b/src/compression/vints.rs
deleted file mode 100644
index c3a95965c..000000000
--- a/src/compression/vints.rs
+++ /dev/null
@@ -1,112 +0,0 @@
-use libc::size_t;
-use std::ptr;
-use std::iter;
-
-extern {
-    fn encode_sorted_vint_native(data: *mut u32, num_els: size_t, output: *mut u8, output_capacity: size_t) -> size_t;
-    fn decode_sorted_vint_native(compressed_data: *const u8, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t;
-}
-
-pub struct VIntsEncoder {
-    input_buffer: Vec<u32>,
-    output_buffer: Vec<u8>,
-}
-
-impl VIntsEncoder {
-
-    pub fn new() -> VIntsEncoder {
-        VIntsEncoder {
-            input_buffer: Vec::with_capacity(128),
-            output_buffer: iter::repeat(0u8).take(256 * 4).collect(),
-        }
-    }
-
-    pub fn encode_sorted(&mut self, input: &[u32]) -> &[u8] {
-        assert!(input.len() < 128);
-        let input_len = input.len();
-        let written_size: usize;
-        // TODO use clone_from when available
-        unsafe {
-            ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len);
-            written_size = encode_sorted_vint_native(
-                self.input_buffer.as_mut_ptr(),
-                input_len as size_t,
-                self.output_buffer.as_mut_ptr(),
-                256 * 4,
-            );
-        }
-        return &self.output_buffer[0..written_size];
-    }
-}
-
-
-
-pub struct VIntsDecoder {
-    output: [u32; 128],
-}
-
-impl VIntsDecoder {
-
-    pub fn new() -> VIntsDecoder {
-        VIntsDecoder {
-            output: [0u32; 128]
-        }
-    }
-
-    pub fn decode_sorted(&mut self,
-                  compressed_data: &[u8]) -> &[u32] {
-        unsafe {
-            let num_uncompressed = decode_sorted_vint_native(
-                compressed_data.as_ptr(),
-                compressed_data.len() as size_t,
-                self.output.as_mut_ptr(),
-                128);
-            &self.output[..num_uncompressed]
-        }
-    }
-}
-
-
-#[cfg(test)]
-mod tests {
-    
-    use super::*;
-
-    #[test]
-    fn test_encode_vint() {
-        {
-            let mut encoder = VIntsEncoder::new();
-            let expected_length = 124;
-            let input: Vec<u32> = (0u32..123u32)
-                .map(|i| i * 7 / 2)
-                .into_iter()
-                .collect();
-            let encoded_data = encoder.encode_sorted(&input);
-            assert_eq!(encoded_data.len(), expected_length);
-            let mut decoder = VIntsDecoder::new();
-            let decoded_data = decoder.decode_sorted(&encoded_data[..]);
-            assert_eq!(123, decoded_data.len());
-            assert_eq!(&decoded_data[0..123], &input[..]);
-        }
-        {
-            let mut encoder = VIntsEncoder::new();
-            let input = vec!(3u32, 17u32, 187u32);
-            let encoded_data = encoder.encode_sorted(&input);
-            assert_eq!(encoded_data.len(), 4);
-            assert_eq!(encoded_data[0], 3u8 + 128u8);
-            assert_eq!(encoded_data[1], (17u8 - 3u8) + 128u8);
-            assert_eq!(encoded_data[2], (187u8 - 17u8 - 128u8));
-            assert_eq!(encoded_data[3], (1u8 + 128u8));
-        }
-        {
-            let mut encoder = VIntsEncoder::new();
-            let input = vec!(0u32, 1u32, 2u32);
-            let encoded_data = encoder.encode_sorted(&input);
-            let mut decoder = VIntsDecoder::new();
-            let decoded_data = decoder.decode_sorted(&encoded_data[..]);
-            assert_eq!(3, decoded_data.len());
-            assert_eq!(&decoded_data[..], &input[..]);
-        }
-    }
-
-}
diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs
index e351d2c17..9d409ac77 100644
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -1,17 +1,15 @@
 use postings::Postings;
-use compression::{NUM_DOCS_PER_BLOCK, Block128Decoder};
+use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockDecoder};
 use DocId;
 use std::cmp::Ordering;
 use postings::SkipResult;
-use std::io::Cursor;
-use common::VInt;
 use std::num::Wrapping;
-use common::BinarySerializable;
 
 // No Term Frequency, no postings.
 pub struct SegmentPostings<'a> {
     doc_freq: usize,
-    block_decoder: Block128Decoder,
+    doc_offset: u32,
+    block_decoder: SIMDBlockDecoder,
     remaining_data: &'a [u8],
     cur: Wrapping<usize>,
 }
@@ -23,29 +21,29 @@ impl<'a> SegmentPostings<'a> {
     pub fn empty() -> SegmentPostings<'a> {
         SegmentPostings {
             doc_freq: 0,
-            block_decoder: Block128Decoder::new(),
+            doc_offset: 0,
+            block_decoder: SIMDBlockDecoder::new(),
             remaining_data: &EMPTY_ARRAY,
             cur: Wrapping(usize::max_value()),
         }
     }
     
     pub fn load_next_block(&mut self,) {
-        if self.doc_freq - self.cur.0 >= NUM_DOCS_PER_BLOCK {
-            self.remaining_data = self.block_decoder.decode_sorted(self.remaining_data);
+        let num_remaining_docs = self.doc_freq - self.cur.0; 
+        if num_remaining_docs >= NUM_DOCS_PER_BLOCK {
+            self.remaining_data = self.block_decoder.uncompress_block_sorted(self.remaining_data, self.doc_offset);
+            self.doc_offset = self.block_decoder.output()[NUM_DOCS_PER_BLOCK - 1];
         }
         else {
-            let mut cursor = Cursor::new(self.remaining_data);
-            let remaining_len: usize = VInt::deserialize(&mut cursor).unwrap().0 as usize;
-            let position = cursor.position() as usize;
-            self.remaining_data = &self.remaining_data[position..position+remaining_len];
-            self.block_decoder.decode_sorted_remaining(self.remaining_data);
+            self.remaining_data = self.block_decoder.uncompress_vint_sorted(self.remaining_data, self.doc_offset, num_remaining_docs);
         }
     }
     
     pub fn from_data(doc_freq: u32, data: &'a [u8]) -> SegmentPostings<'a> {
         SegmentPostings {
             doc_freq: doc_freq as usize,
-            block_decoder: Block128Decoder::new(),
+            doc_offset: 0,
+            block_decoder: SIMDBlockDecoder::new(),
             remaining_data: data,
             cur: Wrapping(usize::max_value()),
         }
diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs
index 630d72fed..7542b2efc 100644
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -2,7 +2,7 @@ use datastruct::FstMapBuilder;
 use super::TermInfo;
 use schema::Term;
 use directory::WritePtr;
-use compression::{NUM_DOCS_PER_BLOCK, Block128Encoder, VIntsEncoder, S4BP128Encoder};
+use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockEncoder, CompositeEncoder};
 use DocId;
 use core::index::Segment;
 use std::io;
@@ -17,9 +17,9 @@ pub struct PostingsSerializer {
     positions_write: WritePtr,
     written_bytes_postings: usize,
     written_bytes_positions: usize,
-    positions_encoder: S4BP128Encoder,
-    block_encoder: Block128Encoder,
-    vints_encoder: VIntsEncoder,
+    last_doc_id_encoded: u32,
+    positions_encoder: CompositeEncoder,
+    block_encoder: SIMDBlockEncoder,
     doc_ids: Vec<DocId>,
     term_freqs: Vec<u32>,
     position_deltas: Vec<u32>,
@@ -40,9 +40,9 @@ impl PostingsSerializer {
             positions_write: positions_write,
             written_bytes_postings: 0,
             written_bytes_positions: 0,
-            positions_encoder: S4BP128Encoder::new(),
-            block_encoder: Block128Encoder::new(),
-            vints_encoder: VIntsEncoder::new(),
+            last_doc_id_encoded: 0u32,
+            positions_encoder: CompositeEncoder::new(),
+            block_encoder: SIMDBlockEncoder::new(),
             doc_ids: Vec::new(),
             term_freqs: Vec::new(),
             position_deltas: Vec::new(),
@@ -54,6 +54,7 @@ impl PostingsSerializer {
     pub fn new_term(&mut self, term: &Term, doc_freq: DocId) -> io::Result<()> {
         try!(self.close_term());
         self.doc_ids.clear();
+        self.last_doc_id_encoded = 0;
         self.term_freqs.clear();
         self.position_deltas.clear();
         let term_info = TermInfo {
@@ -67,16 +68,13 @@ impl PostingsSerializer {
     pub fn close_term(&mut self,) -> io::Result<()> {
         if !self.doc_ids.is_empty() {
             {
-                let block_encoded = self.vints_encoder.encode_sorted(&self.doc_ids[..]);
-                self.written_bytes_postings += try!(VInt(block_encoded.len() as u64).serialize(&mut self.postings_write));
-                
-                for num in block_encoded {
-                    self.written_bytes_postings += try!(num.serialize(&mut self.postings_write));
-                }
+                let block_encoded = self.block_encoder.compress_vint_sorted(&self.doc_ids, self.last_doc_id_encoded);
+                self.written_bytes_postings += block_encoded.len();
+                try!(self.postings_write.write_all(block_encoded));
             }
             if self.is_termfreq_enabled {
                 {
-                    let block_encoded = self.vints_encoder.encode_sorted(&self.term_freqs[..]);
+                    let block_encoded = self.block_encoder.compress_vint_unsorted(&self.term_freqs[..]);
                     self.written_bytes_postings += try!(VInt(block_encoded.len() as u64).serialize(&mut self.postings_write));
                     for num in block_encoded {
                         self.written_bytes_postings += try!(num.serialize(&mut self.postings_write));
@@ -84,8 +82,7 @@ impl PostingsSerializer {
                     self.term_freqs.clear();
                 }
                 if self.is_positions_enabled {
-                    let positions_encoded: &[u8] = self.positions_encoder.encode(&self.position_deltas[..]);
-                    self.written_bytes_positions += try!(VInt(positions_encoded.len() as u64).serialize(&mut self.positions_write));
+                    let positions_encoded: &[u8] = self.positions_encoder.compress_unsorted(&self.position_deltas[..]);
                     try!(self.positions_write.write_all(positions_encoded));
                     self.written_bytes_positions += positions_encoded.len();
                     self.position_deltas.clear();
@@ -107,21 +104,16 @@ impl PostingsSerializer {
         if self.doc_ids.len() == NUM_DOCS_PER_BLOCK { 
             {
                 // encode the positions
-                let block_encoded: &[u8] = self.block_encoder.encode_sorted(&self.doc_ids);
+                let block_encoded: &[u8] = self.block_encoder.compress_block_sorted(&self.doc_ids, self.last_doc_id_encoded);
+                self.last_doc_id_encoded = self.doc_ids[self.doc_ids.len() - 1];
                 try!(self.postings_write.write_all(block_encoded));
                 self.written_bytes_postings += block_encoded.len();
             }
             if self.is_termfreq_enabled {
                 // encode the term_freqs
-                let block_encoded: &[u8] = self.block_encoder.encode_sorted(&self.term_freqs);
+                let block_encoded: &[u8] = self.block_encoder.compress_block_unsorted(&self.term_freqs);
                 try!(self.postings_write.write_all(block_encoded));
                 self.written_bytes_postings += block_encoded.len();
-                if self.is_positions_enabled {
-                    let positions_encoded: &[u8] = self.positions_encoder.encode(&self.position_deltas[..]);
-                    try!(self.positions_write.write_all(positions_encoded));
-                    self.written_bytes_positions += positions_encoded.len();
-                    self.position_deltas.clear();
-                }
                 self.term_freqs.clear();
             }
             self.doc_ids.clear();