Added a broken unit test

Plastic surgery
Closes 466. Removing mentions of the chain collector. (#467 )
2026-01-12 12:02:54 +00:00 · 2019-01-22 08:18:32 +09:00 · 2019-01-19 23:13:27 +09:00 · 2019-01-16 10:28:19 +09:00 · 2019-01-13 14:51:18 +09:00 · 2019-01-13 14:41:56 +09:00
38 changed files with 913 additions and 639 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-Tantivy 0.8.1
+Tantivy 0.8.0
 =====================
 *No change in the index format*
 - API Breaking change in the collector API. (@jwolfe, @fulmicoton)
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.8.0-dev"
+version = "0.8.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -29,7 +29,7 @@ serde = "1.0"
 serde_derive = "1.0"
 serde_json = "1.0"
 num_cpus = "1.2"
-itertools = "0.7"
+itertools = "0.8"
 levenshtein_automata = {version="0.1", features=["fst_automaton"]}
 bit-set = "0.5"
 uuid = { version = "0.7", features = ["v4", "serde"] }
@@ -49,6 +49,7 @@ failure = "0.1"
 htmlescape = "0.3.1"
 fail = "0.2"
 scoped-pool = "1.0"
+murmurhash32 = "0.2"

 [target.'cfg(windows)'.dependencies]
 winapi = "0.2"
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@

 **Tantivy** is a **full text search engine library** written in rust.

-It is closer to [Apache Lucene](https://lucene.apache.org/) than to [Elastic Search](https://www.elastic.co/products/elasticsearch) and [Apache Solr](https://lucene.apache.org/solr/) in the sense it is not
+It is closer to [Apache Lucene](https://lucene.apache.org/) than to [Elasticsearch](https://www.elastic.co/products/elasticsearch) and [Apache Solr](https://lucene.apache.org/solr/) in the sense it is not
 an off-the-shelf search engine server, but rather a crate that can be used
 to build such a search engine.

@@ -76,7 +76,7 @@ It will walk you through getting a wikipedia search engine up and running in a f
 Tantivy compiles on stable rust but requires `Rust >= 1.27`.
 To check out and run tests, you can simply run :

-    git clone git@github.com:tantivy-search/tantivy.git
+    git clone https://github.com/tantivy-search/tantivy.git
    cd tantivy
    cargo build

--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -70,8 +70,6 @@ impl Collector for StatsCollector {
    // Our standard deviation will be a float.
    type Fruit = Option<Stats>;

-    type SegmentFruit = Self::Fruit;
-
    type Child = StatsSegmentCollector;

    fn for_segment(
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -58,7 +58,6 @@ pub struct Count;

 impl Collector for Count {
    type Fruit = usize;
-    type SegmentFruit = usize;

    type Child = SegmentCountCollector;

--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -197,7 +197,7 @@ fn skip<'a, I: Iterator<Item = &'a Facet>>(
 ) -> SkipResult {
    loop {
        match collapse_it.peek() {
-            Some(facet_bytes) => match facet_bytes.encoded_bytes().cmp(target) {
+            Some(facet_bytes) => match facet_bytes.encoded_str().as_bytes().cmp(target) {
                Ordering::Less => {}
                Ordering::Greater => {
                    return SkipResult::OverStep;
@@ -258,8 +258,6 @@ impl FacetCollector {
 impl Collector for FacetCollector {
    type Fruit = FacetCounts;

-    type SegmentFruit = FacetCounts;
-
    type Child = FacetSegmentCollector;

    fn for_segment(
@@ -371,7 +369,8 @@ impl SegmentCollector for FacetSegmentCollector {
            let mut facet = vec![];
            let facet_ord = self.collapse_facet_ords[collapsed_facet_ord];
            facet_dict.ord_to_term(facet_ord as u64, &mut facet);
-            facet_counts.insert(unsafe { Facet::from_encoded(facet) }, count);
+            // TODO
+            facet_counts.insert(Facet::from_encoded(facet).unwrap(), count);
        }
        FacetCounts { facet_counts }
    }
@@ -405,9 +404,9 @@ impl FacetCounts {
        let right_bound = if facet.is_root() {
            Bound::Unbounded
        } else {
-            let mut facet_after_bytes: Vec<u8> = facet.encoded_bytes().to_owned();
-            facet_after_bytes.push(1u8);
-            let facet_after = unsafe { Facet::from_encoded(facet_after_bytes) }; // ok logic
+            let mut facet_after_bytes: String = facet.encoded_str().to_owned();
+            facet_after_bytes.push('\u{1}');
+            let facet_after = Facet::from_encoded_string(facet_after_bytes);
            Bound::Excluded(facet_after)
        };
        let underlying: btree_map::Range<_, _> = self.facet_counts.range((left_bound, right_bound));
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -136,10 +136,8 @@ pub trait Collector: Sync {
    /// e.g. `usize` for the `Count` collector.
    type Fruit: Fruit;

-    type SegmentFruit: Fruit;
-
    /// Type of the `SegmentCollector` associated to this collector.
-    type Child: SegmentCollector<Fruit = Self::SegmentFruit>;
+    type Child: SegmentCollector<Fruit = Self::Fruit>;

    /// `set_segment` is called before beginning to enumerate
    /// on this segment.
@@ -154,7 +152,7 @@ pub trait Collector: Sync {

    /// Combines the fruit associated to the collection of each segments
    /// into one fruit.
-    fn merge_fruits(&self, segment_fruits: Vec<Self::SegmentFruit>) -> Result<Self::Fruit>;
+    fn merge_fruits(&self, segment_fruits: Vec<Self::Fruit>) -> Result<Self::Fruit>;
 }

 /// The `SegmentCollector` is the trait in charge of defining the
@@ -183,9 +181,6 @@ where
    Right: Collector,
 {
    type Fruit = (Left::Fruit, Right::Fruit);
-
-    type SegmentFruit = (Left::SegmentFruit, Right::SegmentFruit);
-
    type Child = (Left::Child, Right::Child);

    fn for_segment(&self, segment_local_id: u32, segment: &SegmentReader) -> Result<Self::Child> {
@@ -200,7 +195,7 @@ where

    fn merge_fruits(
        &self,
-        children: Vec<(Left::SegmentFruit, Right::SegmentFruit)>,
+        children: Vec<(Left::Fruit, Right::Fruit)>,
    ) -> Result<(Left::Fruit, Right::Fruit)> {
        let mut left_fruits = vec![];
        let mut right_fruits = vec![];
@@ -241,7 +236,6 @@ where
    Three: Collector,
 {
    type Fruit = (One::Fruit, Two::Fruit, Three::Fruit);
-    type SegmentFruit = (One::SegmentFruit, Two::SegmentFruit, Three::SegmentFruit);
    type Child = (One::Child, Two::Child, Three::Child);

    fn for_segment(&self, segment_local_id: u32, segment: &SegmentReader) -> Result<Self::Child> {
@@ -255,7 +249,7 @@ where
        self.0.requires_scoring() || self.1.requires_scoring() || self.2.requires_scoring()
    }

-    fn merge_fruits(&self, children: Vec<Self::SegmentFruit>) -> Result<Self::Fruit> {
+    fn merge_fruits(&self, children: Vec<Self::Fruit>) -> Result<Self::Fruit> {
        let mut one_fruits = vec![];
        let mut two_fruits = vec![];
        let mut three_fruits = vec![];
@@ -301,7 +295,6 @@ where
    Four: Collector,
 {
    type Fruit = (One::Fruit, Two::Fruit, Three::Fruit, Four::Fruit);
-    type SegmentFruit = (One::SegmentFruit, Two::SegmentFruit, Three::SegmentFruit, Four::SegmentFruit);
    type Child = (One::Child, Two::Child, Three::Child, Four::Child);

    fn for_segment(&self, segment_local_id: u32, segment: &SegmentReader) -> Result<Self::Child> {
@@ -319,7 +312,7 @@ where
            || self.3.requires_scoring()
    }

-    fn merge_fruits(&self, children: Vec<Self::SegmentFruit>) -> Result<Self::Fruit> {
+    fn merge_fruits(&self, children: Vec<Self::Fruit>) -> Result<Self::Fruit> {
        let mut one_fruits = vec![];
        let mut two_fruits = vec![];
        let mut three_fruits = vec![];
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -18,7 +18,6 @@ pub struct CollectorWrapper<TCollector: Collector>(TCollector);

 impl<TCollector: Collector> Collector for CollectorWrapper<TCollector> {
    type Fruit = Box<Fruit>;
-    type SegmentFruit = Box<Fruit>;
    type Child = Box<BoxableSegmentCollector>;

    fn for_segment(
@@ -35,10 +34,10 @@ impl<TCollector: Collector> Collector for CollectorWrapper<TCollector> {
    }

    fn merge_fruits(&self, children: Vec<<Self as Collector>::Fruit>) -> Result<Box<Fruit>> {
-        let typed_fruit: Vec<TCollector::SegmentFruit> = children
+        let typed_fruit: Vec<TCollector::Fruit> = children
            .into_iter()
            .map(|untyped_fruit| {
-                Downcast::<TCollector::SegmentFruit>::downcast(untyped_fruit)
+                Downcast::<TCollector::Fruit>::downcast(untyped_fruit)
                    .map(|boxed_but_typed| *boxed_but_typed)
                    .map_err(|e| {
                        let err_msg = format!("Failed to cast child collector fruit. {:?}", e);
@@ -97,7 +96,10 @@ impl<TFruit: Fruit> FruitHandle<TFruit> {
 /// Multicollector makes it possible to collect on more than one collector.
 /// It should only be used for use cases where the Collector types is unknown
 /// at compile time.
-/// If the type of the collectors is known, you should prefer to use `ChainedCollector`.
+///
+/// If the type of the collectors is known, you can just group yours collectors
+/// in a tuple. See the
+/// [Combining several collectors section of the collector documentation](./index.html#combining-several-collectors).
 ///
 /// ```rust
 /// #[macro_use]
@@ -153,7 +155,7 @@ impl<TFruit: Fruit> FruitHandle<TFruit> {
 #[derive(Default)]
 pub struct MultiCollector<'a> {
    collector_wrappers:
-        Vec<Box<Collector<Child = Box<BoxableSegmentCollector>, Fruit = Box<Fruit>, SegmentFruit = Box<Fruit>> + 'a>>,
+        Vec<Box<Collector<Child = Box<BoxableSegmentCollector>, Fruit = Box<Fruit>> + 'a>>,
 }

 impl<'a> MultiCollector<'a> {
@@ -178,9 +180,7 @@ impl<'a> MultiCollector<'a> {
 }

 impl<'a> Collector for MultiCollector<'a> {
-
    type Fruit = MultiFruit;
-    type SegmentFruit = MultiFruit;
    type Child = MultiCollectorChild;

    fn for_segment(
--- a/src/collector/tests.rs
+++ b/src/collector/tests.rs
@@ -40,7 +40,6 @@ impl TestFruit {

 impl Collector for TestCollector {
    type Fruit = TestFruit;
-    type SegmentFruit = Self::Fruit;
    type Child = TestSegmentCollector;

    fn for_segment(
@@ -110,8 +109,6 @@ impl FastFieldTestCollector {

 impl Collector for FastFieldTestCollector {
    type Fruit = Vec<u64>;
-    type SegmentFruit = Self::Fruit;
-
    type Child = FastFieldSegmentCollector;

    fn for_segment(
@@ -168,7 +165,6 @@ impl BytesFastFieldTestCollector {

 impl Collector for BytesFastFieldTestCollector {
    type Fruit = Vec<u8>;
-    type SegmentFruit = Self::Fruit;
    type Child = BytesFastFieldSegmentCollector;

    fn for_segment(
--- a/src/collector/top_field_collector.rs
+++ b/src/collector/top_field_collector.rs
@@ -88,7 +88,6 @@ impl<T: FastValue + PartialOrd + Clone> TopDocsByField<T> {

 impl<T: FastValue + PartialOrd + Send + Sync + 'static> Collector for TopDocsByField<T> {
    type Fruit = Vec<(T, DocAddress)>;
-    type SegmentFruit = Vec<(T, DocAddress)>;

    type Child = TopFieldSegmentCollector<T>;

--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -89,7 +89,6 @@ impl TopDocs {

 impl Collector for TopDocs {
    type Fruit = Vec<(Score, DocAddress)>;
-    type SegmentFruit = Vec<(Score, DocAddress)>;

    type Child = TopScoreSegmentCollector;

--- a/src/common/bitpacker.rs
+++ b/src/common/bitpacker.rs
@@ -1,9 +1,6 @@
-use common::serialize::BinarySerializable;
+use byteorder::{ByteOrder, LittleEndian, WriteBytesExt};
 use std::io;
-use std::io::Write;
-use std::mem;
 use std::ops::Deref;
-use std::ptr;

 pub(crate) struct BitPacker {
    mini_buffer: u64,
@@ -18,7 +15,7 @@ impl BitPacker {
        }
    }

-    pub fn write<TWrite: Write>(
+    pub fn write<TWrite: io::Write>(
        &mut self,
        val: u64,
        num_bits: u8,
@@ -28,14 +25,14 @@ impl BitPacker {
        let num_bits = num_bits as usize;
        if self.mini_buffer_written + num_bits > 64 {
            self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32);
-            self.mini_buffer.serialize(output)?;
+            output.write_u64::<LittleEndian>(self.mini_buffer)?;
            self.mini_buffer = val_u64.wrapping_shr((64 - self.mini_buffer_written) as u32);
            self.mini_buffer_written = self.mini_buffer_written + num_bits - 64;
        } else {
            self.mini_buffer |= val_u64 << self.mini_buffer_written;
            self.mini_buffer_written += num_bits;
            if self.mini_buffer_written == 64 {
-                self.mini_buffer.serialize(output)?;
+                output.write_u64::<LittleEndian>(self.mini_buffer)?;
                self.mini_buffer_written = 0;
                self.mini_buffer = 0u64;
            }
@@ -43,17 +40,18 @@ impl BitPacker {
        Ok(())
    }

-    pub fn flush<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
+    pub fn flush<TWrite: io::Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
        if self.mini_buffer_written > 0 {
            let num_bytes = (self.mini_buffer_written + 7) / 8;
-            let arr: [u8; 8] = unsafe { mem::transmute::<u64, [u8; 8]>(self.mini_buffer.to_le()) };
+            let mut arr: [u8; 8] = [0u8; 8];
+            LittleEndian::write_u64(&mut arr, self.mini_buffer);
            output.write_all(&arr[..num_bytes])?;
            self.mini_buffer_written = 0;
        }
        Ok(())
    }

-    pub fn close<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
+    pub fn close<TWrite: io::Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
        self.flush(output)?;
        // Padding the write file to simplify reads.
        output.write_all(&[0u8; 7])?;
@@ -102,39 +100,10 @@ where
            addr + 8 <= data.len(),
            "The fast field field should have been padded with 7 bytes."
        );
-        #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
-        let val_unshifted_unmasked: u64 =
-            u64::from_le(unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) });
+        let val_unshifted_unmasked: u64 = LittleEndian::read_u64(&data[addr..]);
        let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
        val_shifted & mask
    }
-
-    /// Reads a range of values from the fast field.
-    ///
-    /// The range of values read is from
-    /// `[start..start + output.len()[`
-    pub fn get_range(&self, start: u32, output: &mut [u64]) {
-        if self.num_bits == 0 {
-            for val in output.iter_mut() {
-                *val = 0u64;
-            }
-        } else {
-            let data: &[u8] = &*self.data;
-            let num_bits = self.num_bits;
-            let mask = self.mask;
-            let mut addr_in_bits = (start as usize) * num_bits;
-            for output_val in output.iter_mut() {
-                let addr = addr_in_bits >> 3;
-                let bit_shift = addr_in_bits & 7;
-                #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
-                let val_unshifted_unmasked: u64 =
-                    unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) };
-                let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
-                *output_val = val_shifted & mask;
-                addr_in_bits += num_bits;
-            }
-        }
-    }
 }

 #[cfg(test)]
@@ -172,17 +141,4 @@ mod test {
        test_bitpacker_util(6, 14);
        test_bitpacker_util(1000, 14);
    }
-
-    #[test]
-    fn test_bitpacker_range() {
-        let (bitunpacker, vals) = create_fastfield_bitpacker(100_000, 12);
-        let buffer_len = 100;
-        let mut buffer = vec![0u64; buffer_len];
-        for start in vec![0, 10, 20, 100, 1_000] {
-            bitunpacker.get_range(start as u32, &mut buffer[..]);
-            for i in 0..buffer_len {
-                assert_eq!(buffer[i], vals[start + i]);
-            }
-        }
-    }
 }
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -10,7 +10,7 @@ pub(crate) use self::bitset::TinySet;
 pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
 pub use self::counting_writer::CountingWriter;
 pub use self::serialize::{BinarySerializable, FixedSize};
-pub use self::vint::VInt;
+pub use self::vint::{read_u32_vint, serialize_vint_u32, write_u32_vint, VInt};
 pub use byteorder::LittleEndian as Endianness;

 use std::io;
--- a/src/common/vint.rs
+++ b/src/common/vint.rs
@@ -1,4 +1,5 @@
 use super::BinarySerializable;
+use byteorder::{ByteOrder, LittleEndian};
 use std::io;
 use std::io::Read;
 use std::io::Write;
@@ -9,6 +10,100 @@ pub struct VInt(pub u64);

 const STOP_BIT: u8 = 128;

+pub fn serialize_vint_u32(val: u32) -> (u64, usize) {
+    const START_2: u64 = 1 << 7;
+    const START_3: u64 = 1 << 14;
+    const START_4: u64 = 1 << 21;
+    const START_5: u64 = 1 << 28;
+
+    const STOP_1: u64 = START_2 - 1;
+    const STOP_2: u64 = START_3 - 1;
+    const STOP_3: u64 = START_4 - 1;
+    const STOP_4: u64 = START_5 - 1;
+
+    const MASK_1: u64 = 127;
+    const MASK_2: u64 = MASK_1 << 7;
+    const MASK_3: u64 = MASK_2 << 7;
+    const MASK_4: u64 = MASK_3 << 7;
+    const MASK_5: u64 = MASK_4 << 7;
+
+    let val = u64::from(val);
+    const STOP_BIT: u64 = 128u64;
+    match val {
+        0...STOP_1 => (val | STOP_BIT, 1),
+        START_2...STOP_2 => (
+            (val & MASK_1) | ((val & MASK_2) << 1) | (STOP_BIT << (8)),
+            2,
+        ),
+        START_3...STOP_3 => (
+            (val & MASK_1) | ((val & MASK_2) << 1) | ((val & MASK_3) << 2) | (STOP_BIT << (8 * 2)),
+            3,
+        ),
+        START_4...STOP_4 => (
+            (val & MASK_1)
+                | ((val & MASK_2) << 1)
+                | ((val & MASK_3) << 2)
+                | ((val & MASK_4) << 3)
+                | (STOP_BIT << (8 * 3)),
+            4,
+        ),
+        _ => (
+            (val & MASK_1)
+                | ((val & MASK_2) << 1)
+                | ((val & MASK_3) << 2)
+                | ((val & MASK_4) << 3)
+                | ((val & MASK_5) << 4)
+                | (STOP_BIT << (8 * 4)),
+            5,
+        ),
+    }
+}
+
+/// Returns the number of bytes covered by a
+/// serialized vint `u32`.
+///
+/// Expects a buffer data that starts
+/// by the serialized `vint`, scans at most 5 bytes ahead until
+/// it finds the vint final byte.
+///
+/// # May Panic
+/// If the payload does not start by a valid `vint`
+fn vint_len(data: &[u8]) -> usize {
+    for i in 0..5.min(data.len()) {
+        if data[i] >= STOP_BIT {
+            return i + 1;
+        }
+    }
+    panic!("Corrupted data. Invalid VInt 32");
+}
+
+/// Reads a vint `u32` from a buffer, and
+/// consumes its payload data.
+///
+/// # Panics
+///
+/// If the buffer does not start by a valid
+/// vint payload
+pub fn read_u32_vint(data: &mut &[u8]) -> u32 {
+    let vlen = vint_len(*data);
+    let mut result = 0u32;
+    let mut shift = 0u64;
+    for &b in &data[..vlen] {
+        result |= u32::from(b & 127u8) << shift;
+        shift += 7;
+    }
+    *data = &data[vlen..];
+    result
+}
+
+/// Write a `u32` as a vint payload.
+pub fn write_u32_vint<W: io::Write>(val: u32, writer: &mut W) -> io::Result<()> {
+    let (val, num_bytes) = serialize_vint_u32(val);
+    let mut buffer = [0u8; 8];
+    LittleEndian::write_u64(&mut buffer, val);
+    writer.write_all(&buffer[..num_bytes])
+}
+
 impl VInt {
    pub fn val(&self) -> u64 {
        self.0
@@ -24,7 +119,7 @@ impl VInt {
        output.extend(&buffer[0..num_bytes]);
    }

-    fn serialize_into(&self, buffer: &mut [u8; 10]) -> usize {
+    pub fn serialize_into(&self, buffer: &mut [u8; 10]) -> usize {
        let mut remaining = self.0;
        for (i, b) in buffer.iter_mut().enumerate() {
            let next_byte: u8 = (remaining % 128u64) as u8;
@@ -64,7 +159,7 @@ impl BinarySerializable for VInt {
                    return Err(io::Error::new(
                        io::ErrorKind::InvalidData,
                        "Reach end of buffer while reading VInt",
-                    ))
+                    ));
                }
            }
        }
@@ -74,7 +169,9 @@ impl BinarySerializable for VInt {
 #[cfg(test)]
 mod tests {

+    use super::serialize_vint_u32;
    use super::VInt;
+    use byteorder::{ByteOrder, LittleEndian};
    use common::BinarySerializable;

    fn aux_test_vint(val: u64) {
@@ -108,4 +205,28 @@ mod tests {
        }
        aux_test_vint(10);
    }
+
+    fn aux_test_serialize_vint_u32(val: u32) {
+        let mut buffer = [0u8; 10];
+        let mut buffer2 = [0u8; 10];
+        let len_vint = VInt(val as u64).serialize_into(&mut buffer);
+        let (vint, len) = serialize_vint_u32(val);
+        assert_eq!(len, len_vint, "len wrong for val {}", val);
+        LittleEndian::write_u64(&mut buffer2, vint);
+        assert_eq!(&buffer[..len], &buffer2[..len], "array wrong for {}", val);
+    }
+
+    #[test]
+    fn test_vint_u32() {
+        aux_test_serialize_vint_u32(0);
+        aux_test_serialize_vint_u32(1);
+        aux_test_serialize_vint_u32(5);
+        for i in 1..3 {
+            let power_of_128 = 1u32 << (7 * i);
+            aux_test_serialize_vint_u32(power_of_128 - 1u32);
+            aux_test_serialize_vint_u32(power_of_128);
+            aux_test_serialize_vint_u32(power_of_128 + 1u32);
+        }
+        aux_test_serialize_vint_u32(u32::max_value());
+    }
 }
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -64,17 +64,18 @@ impl Executor {
                    // This is important as it makes it possible for the fruit_receiver iteration to
                    // terminate.
                };
-                let mut results = Vec::with_capacity(num_fruits);
-                unsafe { results.set_len(num_fruits) };
-                let mut num_items = 0;
+                // This is lame, but safe.
+                let mut results_with_position = Vec::with_capacity(num_fruits);
                for (pos, fruit_res) in fruit_receiver {
-                    results[pos] = fruit_res?;
-                    num_items += 1;
+                    let fruit = fruit_res?;
+                    results_with_position.push((pos, fruit));
                }
-                // this checks ensures that we filled of this
-                // uninitialized memory.
-                assert_eq!(num_items, results.len());
-                Ok(results)
+                results_with_position.sort_by_key(|(pos, _)| *pos);
+                assert_eq!(results_with_position.len(), num_fruits);
+                Ok(results_with_position
+                    .into_iter()
+                    .map(|(_, fruit)| fruit)
+                    .collect::<Vec<_>>())
            }
        }
    }
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -13,6 +13,7 @@ use directory::ManagedDirectory;
 #[cfg(feature = "mmap")]
 use directory::MmapDirectory;
 use directory::{Directory, RAMDirectory};
+use error::DataCorruption;
 use error::TantivyError;
 use indexer::index_writer::open_index_writer;
 use indexer::index_writer::HEAP_SIZE_MIN;
@@ -37,7 +38,13 @@ fn load_metas(directory: &Directory) -> Result<IndexMeta> {
    let meta_data = directory.atomic_read(&META_FILEPATH)?;
    let meta_string = String::from_utf8_lossy(&meta_data);
    serde_json::from_str(&meta_string)
-        .map_err(|_| TantivyError::CorruptedFile(META_FILEPATH.clone()))
+        .map_err(|e| {
+            DataCorruption::new(
+                META_FILEPATH.clone(),
+                format!("Meta file cannot be deserialized. {:?}.", e),
+            )
+        })
+        .map_err(From::from)
 }

 /// Search Index
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -23,7 +23,7 @@ fn collect_segment<C: Collector>(
    weight: &Weight,
    segment_ord: u32,
    segment_reader: &SegmentReader,
-) -> Result<C::SegmentFruit> {
+) -> Result<C::Fruit> {
    let mut scorer = weight.scorer(segment_reader)?;
    let mut segment_collector = collector.for_segment(segment_ord as u32, segment_reader)?;
    if let Some(delete_bitset) = segment_reader.delete_bitset() {
--- a/src/directory/managed_directory.rs
+++ b/src/directory/managed_directory.rs
@@ -1,7 +1,7 @@
 use core::MANAGED_FILEPATH;
 use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError};
 use directory::{ReadOnlySource, WritePtr};
-use error::TantivyError;
+use error::DataCorruption;
 use indexer::LockType;
 use serde_json;
 use std::collections::HashSet;
@@ -64,7 +64,12 @@ impl ManagedDirectory {
            Ok(data) => {
                let managed_files_json = String::from_utf8_lossy(&data);
                let managed_files: HashSet<PathBuf> = serde_json::from_str(&managed_files_json)
-                    .map_err(|_| TantivyError::CorruptedFile(MANAGED_FILEPATH.clone()))?;
+                    .map_err(|e| {
+                        DataCorruption::new(
+                            MANAGED_FILEPATH.clone(),
+                            format!("Managed file cannot be deserialized: {:?}. ", e),
+                        )
+                    })?;
                Ok(ManagedDirectory {
                    directory: Box::new(directory),
                    meta_informations: Arc::new(RwLock::new(MetaInformation {
--- a/src/error.rs
+++ b/src/error.rs
@@ -8,9 +8,42 @@ use indexer::LockType;
 use query;
 use schema;
 use serde_json;
+use std::fmt;
 use std::path::PathBuf;
 use std::sync::PoisonError;

+pub struct DataCorruption {
+    filepath: Option<PathBuf>,
+    comment: String,
+}
+
+impl DataCorruption {
+    pub fn new(filepath: PathBuf, comment: String) -> DataCorruption {
+        DataCorruption {
+            filepath: Some(filepath),
+            comment,
+        }
+    }
+
+    pub fn comment_only(comment: String) -> DataCorruption {
+        DataCorruption {
+            filepath: None,
+            comment,
+        }
+    }
+}
+
+impl fmt::Debug for DataCorruption {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "Data corruption: ")?;
+        if let Some(ref filepath) = &self.filepath {
+            write!(f, "(in file `{:?}`)", filepath)?;
+        }
+        write!(f, ": {}.", self.comment)?;
+        Ok(())
+    }
+}
+
 /// The library's failure based error enum
 #[derive(Debug, Fail)]
 pub enum TantivyError {
@@ -33,8 +66,8 @@ pub enum TantivyError {
    #[fail(display = "An IO error occurred: '{}'", _0)]
    IOError(#[cause] IOError),
    /// Data corruption.
-    #[fail(display = "File contains corrupted data: '{:?}'", _0)]
-    CorruptedFile(PathBuf),
+    #[fail(display = "{:?}", _0)]
+    DataCorruption(DataCorruption),
    /// A thread holding the locked panicked and poisoned the lock.
    #[fail(display = "A thread holding the locked panicked and poisoned the lock")]
    Poisoned,
@@ -55,6 +88,12 @@ pub enum TantivyError {
    SystemError(String),
 }

+impl From<DataCorruption> for TantivyError {
+    fn from(data_corruption: DataCorruption) -> TantivyError {
+        TantivyError::DataCorruption(data_corruption)
+    }
+}
+
 impl From<FastFieldNotAvailableError> for TantivyError {
    fn from(fastfield_error: FastFieldNotAvailableError) -> TantivyError {
        TantivyError::FastFieldError(fastfield_error)
--- a/src/fastfield/facet_reader.rs
+++ b/src/fastfield/facet_reader.rs
@@ -1,5 +1,6 @@
 use super::MultiValueIntFastFieldReader;
 use schema::Facet;
+use std::str;
 use termdict::TermDictionary;
 use termdict::TermOrdinal;
 use DocId;
@@ -20,6 +21,7 @@ use DocId;
 pub struct FacetReader {
    term_ords: MultiValueIntFastFieldReader<u64>,
    term_dict: TermDictionary,
+    buffer: Vec<u8>,
 }

 impl FacetReader {
@@ -37,6 +39,7 @@ impl FacetReader {
        FacetReader {
            term_ords,
            term_dict,
+            buffer: vec![],
        }
    }

@@ -55,11 +58,18 @@ impl FacetReader {
    }

    /// Given a term ordinal returns the term associated to it.
-    pub fn facet_from_ord(&self, facet_ord: TermOrdinal, output: &mut Facet) {
+    pub fn facet_from_ord(
+        &mut self,
+        facet_ord: TermOrdinal,
+        output: &mut Facet,
+    ) -> Result<(), str::Utf8Error> {
        let found_term = self
            .term_dict
-            .ord_to_term(facet_ord as u64, output.inner_buffer_mut());
+            .ord_to_term(facet_ord as u64, &mut self.buffer);
        assert!(found_term, "Term ordinal {} no found.", facet_ord);
+        let facet_str = str::from_utf8(&self.buffer[..])?;
+        output.set_facet_str(facet_str);
+        Ok(())
    }

    /// Return the list of facet ordinals associated to a document.
--- a/src/fastfield/multivalued/reader.rs
+++ b/src/fastfield/multivalued/reader.rs
@@ -82,20 +82,20 @@ mod tests {

        let mut facet = Facet::root();
        {
-            facet_reader.facet_from_ord(1, &mut facet);
+            facet_reader.facet_from_ord(1, &mut facet).unwrap();
            assert_eq!(facet, Facet::from("/category"));
        }
        {
-            facet_reader.facet_from_ord(2, &mut facet);
+            facet_reader.facet_from_ord(2, &mut facet).unwrap();
            assert_eq!(facet, Facet::from("/category/cat1"));
        }
        {
-            facet_reader.facet_from_ord(3, &mut facet);
+            facet_reader.facet_from_ord(3, &mut facet).unwrap();
            assert_eq!(format!("{}", facet), "/category/cat2");
            assert_eq!(facet, Facet::from("/category/cat2"));
        }
        {
-            facet_reader.facet_from_ord(4, &mut facet);
+            facet_reader.facet_from_ord(4, &mut facet).unwrap();
            assert_eq!(facet, Facet::from("/category/cat3"));
        }

--- a/src/fastfield/reader.rs
+++ b/src/fastfield/reader.rs
@@ -79,11 +79,8 @@ impl<Item: FastValue> FastFieldReader<Item> {
    // TODO change start to `u64`.
    // For multifastfield, start is an index in a second fastfield, not a `DocId`
    pub fn get_range(&self, start: u32, output: &mut [Item]) {
-        // ok: Item is either `u64` or `i64`
-        let output_u64: &mut [u64] = unsafe { &mut *(output as *mut [Item] as *mut [u64]) };
-        self.bit_unpacker.get_range(start, output_u64);
-        for out in output_u64.iter_mut() {
-            *out = Item::from_u64(*out + self.min_value_u64).as_u64();
+        for (i, out) in output.iter_mut().enumerate() {
+            *out = self.get(start + i as u32);
        }
    }

--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -26,7 +26,6 @@ use schema::Document;
 use schema::IndexRecordOption;
 use schema::Term;
 use std::mem;
-use std::mem::swap;
 use std::thread;
 use std::thread::JoinHandle;
 use Result;
@@ -52,17 +51,19 @@ type DocumentReceiver = channel::Receiver<AddOperation>;
 ///
 /// Returns (the heap size in bytes, the hash table size in number of bits)
 fn initial_table_size(per_thread_memory_budget: usize) -> usize {
+    assert!(per_thread_memory_budget > 1_000);
    let table_size_limit: usize = per_thread_memory_budget / 3;
-    (1..)
+    if let Some(limit) = (1..)
        .take_while(|num_bits: &usize| compute_table_size(*num_bits) < table_size_limit)
        .last()
-        .unwrap_or_else(|| {
-            panic!(
-                "Per thread memory is too small: {}",
-                per_thread_memory_budget
-            )
-        })
-        .min(19) // we cap it at 512K
+    {
+        limit.min(19) // we cap it at 2^19 = 512K.
+    } else {
+        unreachable!(
+            "Per thread memory is too small: {}",
+            per_thread_memory_budget
+        );
+    }
 }

 /// `IndexWriter` is the user entry-point to add document to an index.
@@ -302,7 +303,7 @@ fn index_documents(

    let last_docstamp: u64 = *(doc_opstamps.last().unwrap());

-    let segment_entry: SegmentEntry = if delete_cursor.get().is_some() {
+    let delete_bitset_opt = if delete_cursor.get().is_some() {
        let doc_to_opstamps = DocToOpstampMapping::from(doc_opstamps);
        let segment_reader = SegmentReader::open(segment)?;
        let mut deleted_bitset = BitSet::with_capacity(num_docs as usize);
@@ -313,18 +314,17 @@ fn index_documents(
            &doc_to_opstamps,
            last_docstamp,
        )?;
-        SegmentEntry::new(segment_meta, delete_cursor, {
-            if may_have_deletes {
-                Some(deleted_bitset)
-            } else {
-                None
-            }
-        })
+        if may_have_deletes {
+            Some(deleted_bitset)
+        } else {
+            None
+        }
    } else {
        // if there are no delete operation in the queue, no need
        // to even open the segment.
-        SegmentEntry::new(segment_meta, delete_cursor, None)
+        None
    };
+    let segment_entry = SegmentEntry::new(segment_meta, delete_cursor, delete_bitset_opt);
    Ok(segment_updater.add_segment(generation, segment_entry))
 }

@@ -467,11 +467,10 @@ impl IndexWriter {
    ///
    /// Returns the former segment_ready channel.
    fn recreate_document_channel(&mut self) -> DocumentReceiver {
-        let (mut document_sender, mut document_receiver): (DocumentSender, DocumentReceiver) =
+        let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) =
            channel::bounded(PIPELINE_MAX_SIZE_IN_DOCS);
-        swap(&mut self.document_sender, &mut document_sender);
-        swap(&mut self.document_receiver, &mut document_receiver);
-        document_receiver
+        mem::replace(&mut self.document_sender, document_sender);
+        mem::replace(&mut self.document_receiver, document_receiver)
    }

    /// Rollback to the last commit
@@ -558,17 +557,13 @@ impl IndexWriter {
        // and recreate a new one channels.
        self.recreate_document_channel();

-        let mut former_workers_join_handle = Vec::new();
-        swap(
-            &mut former_workers_join_handle,
-            &mut self.workers_join_handle,
-        );
+        let former_workers_join_handle =
+            mem::replace(&mut self.workers_join_handle, Vec::new());

        for worker_handle in former_workers_join_handle {
            let indexing_worker_result = worker_handle
                .join()
                .map_err(|e| TantivyError::ErrorInThread(format!("{:?}", e)))?;
-
            indexing_worker_result?;
            // add a new worker for the next generation.
            self.add_indexing_worker()?;
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -1255,6 +1255,36 @@ mod tests {
        }
    }

+
+    use schema::INT_INDEXED;
+    #[test]
+    fn test_bug_merge() {
+        let mut schema_builder = schema::Schema::builder();
+        let int_field = schema_builder.add_u64_field("intvals", INT_INDEXED);
+        let index = Index::create_in_ram(schema_builder.build());
+        let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+        index_writer.add_document(doc!(int_field => 1u64));
+        index_writer.commit().expect("commit failed");
+        index_writer.add_document(doc!(int_field => 1u64));
+        index_writer.commit().expect("commit failed");
+        index.load_searchers().unwrap();
+        let searcher = index.searcher();
+        assert_eq!(searcher.num_docs(), 2);
+        index_writer.delete_term(Term::from_field_u64(int_field, 1));
+        let segment_ids = index
+            .searchable_segment_ids()
+            .expect("Searchable segments failed.");
+        index_writer
+            .merge(&segment_ids)
+            .expect("Failed to initiate merge")
+            .wait()
+            .expect("Merging failed");
+        index.load_searchers().unwrap();
+        // commit has not been called yet. The document should still be
+        // there.
+        assert_eq!(index.searcher().num_docs(), 2);
+    }
+
    #[test]
    fn test_merge_multivalued_int_fields_all_deleted() {
        let mut schema_builder = schema::Schema::builder();
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -111,19 +111,18 @@ impl SegmentWriter {
            }
            match *field_options.field_type() {
                FieldType::HierarchicalFacet => {
-                    let facets: Vec<&[u8]> = field_values
+                    let facets: Vec<&str> = field_values
                        .iter()
                        .flat_map(|field_value| match *field_value.value() {
-                            Value::Facet(ref facet) => Some(facet.encoded_bytes()),
+                            Value::Facet(ref facet) => Some(facet.encoded_str()),
                            _ => {
                                panic!("Expected hierarchical facet");
                            }
                        })
                        .collect();
                    let mut term = Term::for_field(field); // we set the Term
-                    for facet_bytes in facets {
+                    for fake_str in facets {
                        let mut unordered_term_id_opt = None;
-                        let fake_str = unsafe { str::from_utf8_unchecked(facet_bytes) };
                        FacetTokenizer.token_stream(fake_str).process(&mut |token| {
                            term.set_text(&token.text);
                            let unordered_term_id =
--- a/src/postings/postings_writer.rs
+++ b/src/postings/postings_writer.rs
@@ -1,6 +1,8 @@
 use super::stacker::{Addr, MemoryArena, TermHashMap};

-use postings::recorder::{NothingRecorder, Recorder, TFAndPositionRecorder, TermFrequencyRecorder};
+use postings::recorder::{
+    BufferLender, NothingRecorder, Recorder, TFAndPositionRecorder, TermFrequencyRecorder,
+};
 use postings::UnorderedTermId;
 use postings::{FieldSerializer, InvertedIndexSerializer};
 use schema::IndexRecordOption;
@@ -213,7 +215,7 @@ pub trait PostingsWriter {

 /// The `SpecializedPostingsWriter` is just here to remove dynamic
 /// dispatch to the recorder information.
-pub struct SpecializedPostingsWriter<Rec: Recorder + 'static> {
+pub(crate) struct SpecializedPostingsWriter<Rec: Recorder + 'static> {
    total_num_tokens: u64,
    _recorder_type: PhantomData<Rec>,
 }
@@ -245,8 +247,7 @@ impl<Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<Rec>
        debug_assert!(term.as_slice().len() >= 4);
        self.total_num_tokens += 1;
        term_index.mutate_or_create(term, |opt_recorder: Option<Rec>| {
-            if opt_recorder.is_some() {
-                let mut recorder = opt_recorder.unwrap();
+            if let Some(mut recorder) = opt_recorder {
                let current_doc = recorder.current_doc();
                if current_doc != doc {
                    recorder.close_doc(heap);
@@ -255,7 +256,7 @@ impl<Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<Rec>
                recorder.record_position(position, heap);
                recorder
            } else {
-                let mut recorder = Rec::new(heap);
+                let mut recorder = Rec::new();
                recorder.new_doc(doc, heap);
                recorder.record_position(position, heap);
                recorder
@@ -270,10 +271,11 @@ impl<Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<Rec>
        termdict_heap: &MemoryArena,
        heap: &MemoryArena,
    ) -> io::Result<()> {
+        let mut buffer_lender = BufferLender::default();
        for &(term_bytes, addr, _) in term_addrs {
-            let recorder: Rec = unsafe { termdict_heap.read(addr) };
+            let recorder: Rec = termdict_heap.read(addr);
            serializer.new_term(&term_bytes[4..])?;
-            recorder.serialize(serializer, heap)?;
+            recorder.serialize(&mut buffer_lender, serializer, heap)?;
            serializer.close_term()?;
        }
        Ok(())
--- a/src/postings/recorder.rs
+++ b/src/postings/recorder.rs
@@ -1,10 +1,51 @@
 use super::stacker::{ExpUnrolledLinkedList, MemoryArena};
+use common::{read_u32_vint, write_u32_vint};
 use postings::FieldSerializer;
-use std::{self, io};
+use std::io;
 use DocId;

 const EMPTY_ARRAY: [u32; 0] = [0u32; 0];
-const POSITION_END: u32 = std::u32::MAX;
+const POSITION_END: u32 = 0;
+
+#[derive(Default)]
+pub(crate) struct BufferLender {
+    buffer_u8: Vec<u8>,
+    buffer_u32: Vec<u32>,
+}
+
+impl BufferLender {
+    pub fn lend_u8(&mut self) -> &mut Vec<u8> {
+        self.buffer_u8.clear();
+        &mut self.buffer_u8
+    }
+    pub fn lend_all(&mut self) -> (&mut Vec<u8>, &mut Vec<u32>) {
+        self.buffer_u8.clear();
+        self.buffer_u32.clear();
+        (&mut self.buffer_u8, &mut self.buffer_u32)
+    }
+}
+
+pub struct VInt32Reader<'a> {
+    data: &'a [u8],
+}
+
+impl<'a> VInt32Reader<'a> {
+    fn new(data: &'a [u8]) -> VInt32Reader<'a> {
+        VInt32Reader { data }
+    }
+}
+
+impl<'a> Iterator for VInt32Reader<'a> {
+    type Item = u32;
+
+    fn next(&mut self) -> Option<u32> {
+        if self.data.is_empty() {
+            None
+        } else {
+            Some(read_u32_vint(&mut self.data))
+        }
+    }
+}

 /// Recorder is in charge of recording relevant information about
 /// the presence of a term in a document.
@@ -15,9 +56,9 @@ const POSITION_END: u32 = std::u32::MAX;
 ///   * the document id
 ///   * the term frequency
 ///   * the term positions
-pub trait Recorder: Copy {
+pub(crate) trait Recorder: Copy + 'static {
    ///
-    fn new(heap: &mut MemoryArena) -> Self;
+    fn new() -> Self;
    /// Returns the current document
    fn current_doc(&self) -> u32;
    /// Starts recording information about a new document
@@ -29,7 +70,12 @@ pub trait Recorder: Copy {
    /// Close the document. It will help record the term frequency.
    fn close_doc(&mut self, heap: &mut MemoryArena);
    /// Pushes the postings information to the serializer.
-    fn serialize(&self, serializer: &mut FieldSerializer, heap: &MemoryArena) -> io::Result<()>;
+    fn serialize(
+        &self,
+        buffer_lender: &mut BufferLender,
+        serializer: &mut FieldSerializer,
+        heap: &MemoryArena,
+    ) -> io::Result<()>;
 }

 /// Only records the doc ids
@@ -40,9 +86,9 @@ pub struct NothingRecorder {
 }

 impl Recorder for NothingRecorder {
-    fn new(heap: &mut MemoryArena) -> Self {
+    fn new() -> Self {
        NothingRecorder {
-            stack: ExpUnrolledLinkedList::new(heap),
+            stack: ExpUnrolledLinkedList::new(),
            current_doc: u32::max_value(),
        }
    }
@@ -53,16 +99,23 @@ impl Recorder for NothingRecorder {

    fn new_doc(&mut self, doc: DocId, heap: &mut MemoryArena) {
        self.current_doc = doc;
-        self.stack.push(doc, heap);
+        let _ = write_u32_vint(doc, &mut self.stack.writer(heap));
    }

    fn record_position(&mut self, _position: u32, _heap: &mut MemoryArena) {}

    fn close_doc(&mut self, _heap: &mut MemoryArena) {}

-    fn serialize(&self, serializer: &mut FieldSerializer, heap: &MemoryArena) -> io::Result<()> {
-        for doc in self.stack.iter(heap) {
-            serializer.write_doc(doc, 0u32, &EMPTY_ARRAY)?;
+    fn serialize(
+        &self,
+        buffer_lender: &mut BufferLender,
+        serializer: &mut FieldSerializer,
+        heap: &MemoryArena,
+    ) -> io::Result<()> {
+        let buffer = buffer_lender.lend_u8();
+        self.stack.read_to_end(heap, buffer);
+        for doc in VInt32Reader::new(&buffer[..]) {
+            serializer.write_doc(doc as u32, 0u32, &EMPTY_ARRAY)?;
        }
        Ok(())
    }
@@ -77,9 +130,9 @@ pub struct TermFrequencyRecorder {
 }

 impl Recorder for TermFrequencyRecorder {
-    fn new(heap: &mut MemoryArena) -> Self {
+    fn new() -> Self {
        TermFrequencyRecorder {
-            stack: ExpUnrolledLinkedList::new(heap),
+            stack: ExpUnrolledLinkedList::new(),
            current_doc: u32::max_value(),
            current_tf: 0u32,
        }
@@ -91,7 +144,7 @@ impl Recorder for TermFrequencyRecorder {

    fn new_doc(&mut self, doc: DocId, heap: &mut MemoryArena) {
        self.current_doc = doc;
-        self.stack.push(doc, heap);
+        let _ = write_u32_vint(doc, &mut self.stack.writer(heap));
    }

    fn record_position(&mut self, _position: u32, _heap: &mut MemoryArena) {
@@ -100,24 +153,24 @@ impl Recorder for TermFrequencyRecorder {

    fn close_doc(&mut self, heap: &mut MemoryArena) {
        debug_assert!(self.current_tf > 0);
-        self.stack.push(self.current_tf, heap);
+        let _ = write_u32_vint(self.current_tf, &mut self.stack.writer(heap));
        self.current_tf = 0;
    }

-    fn serialize(&self, serializer: &mut FieldSerializer, heap: &MemoryArena) -> io::Result<()> {
-        // the last document has not been closed...
-        // its term freq is self.current_tf.
-        let mut doc_iter = self
-            .stack
-            .iter(heap)
-            .chain(Some(self.current_tf).into_iter());
-
-        while let Some(doc) = doc_iter.next() {
-            let term_freq = doc_iter
-                .next()
-                .expect("The IndexWriter recorded a doc without a term freq.");
-            serializer.write_doc(doc, term_freq, &EMPTY_ARRAY)?;
+    fn serialize(
+        &self,
+        buffer_lender: &mut BufferLender,
+        serializer: &mut FieldSerializer,
+        heap: &MemoryArena,
+    ) -> io::Result<()> {
+        let buffer = buffer_lender.lend_u8();
+        self.stack.read_to_end(heap, buffer);
+        let mut u32_it = VInt32Reader::new(&buffer[..]);
+        while let Some(doc) = u32_it.next() {
+            let term_freq = u32_it.next().unwrap_or(self.current_tf);
+            serializer.write_doc(doc as u32, term_freq, &EMPTY_ARRAY)?;
        }
+
        Ok(())
    }
 }
@@ -128,11 +181,10 @@ pub struct TFAndPositionRecorder {
    stack: ExpUnrolledLinkedList,
    current_doc: DocId,
 }
-
 impl Recorder for TFAndPositionRecorder {
-    fn new(heap: &mut MemoryArena) -> Self {
+    fn new() -> Self {
        TFAndPositionRecorder {
-            stack: ExpUnrolledLinkedList::new(heap),
+            stack: ExpUnrolledLinkedList::new(),
            current_doc: u32::max_value(),
        }
    }
@@ -143,33 +195,88 @@ impl Recorder for TFAndPositionRecorder {

    fn new_doc(&mut self, doc: DocId, heap: &mut MemoryArena) {
        self.current_doc = doc;
-        self.stack.push(doc, heap);
+        let _ = write_u32_vint(doc, &mut self.stack.writer(heap));
    }

    fn record_position(&mut self, position: u32, heap: &mut MemoryArena) {
-        self.stack.push(position, heap);
+        let _ = write_u32_vint(position + 1u32, &mut self.stack.writer(heap));
    }

    fn close_doc(&mut self, heap: &mut MemoryArena) {
-        self.stack.push(POSITION_END, heap);
+        let _ = write_u32_vint(POSITION_END, &mut self.stack.writer(heap));
    }

-    fn serialize(&self, serializer: &mut FieldSerializer, heap: &MemoryArena) -> io::Result<()> {
-        let mut doc_positions = Vec::with_capacity(100);
-        let mut positions_iter = self.stack.iter(heap);
-        while let Some(doc) = positions_iter.next() {
-            let mut prev_position = 0;
-            doc_positions.clear();
-            for position in &mut positions_iter {
-                if position == POSITION_END {
-                    break;
-                } else {
-                    doc_positions.push(position - prev_position);
-                    prev_position = position;
+    fn serialize(
+        &self,
+        buffer_lender: &mut BufferLender,
+        serializer: &mut FieldSerializer,
+        heap: &MemoryArena,
+    ) -> io::Result<()> {
+        let (buffer_u8, buffer_positions) = buffer_lender.lend_all();
+        self.stack.read_to_end(heap, buffer_u8);
+        let mut u32_it = VInt32Reader::new(&buffer_u8[..]);
+        while let Some(doc) = u32_it.next() {
+            let mut prev_position_plus_one = 1u32;
+            buffer_positions.clear();
+            loop {
+                match u32_it.next() {
+                    Some(POSITION_END) | None => {
+                        break;
+                    }
+                    Some(position_plus_one) => {
+                        let delta_position = position_plus_one - prev_position_plus_one;
+                        buffer_positions.push(delta_position);
+                        prev_position_plus_one = position_plus_one;
+                    }
                }
            }
-            serializer.write_doc(doc, doc_positions.len() as u32, &doc_positions)?;
+            serializer.write_doc(doc, buffer_positions.len() as u32, &buffer_positions)?;
        }
        Ok(())
    }
 }
+
+#[cfg(test)]
+mod tests {
+
+    use super::write_u32_vint;
+    use super::BufferLender;
+    use super::VInt32Reader;
+
+    #[test]
+    fn test_buffer_lender() {
+        let mut buffer_lender = BufferLender::default();
+        {
+            let buf = buffer_lender.lend_u8();
+            assert!(buf.is_empty());
+            buf.push(1u8);
+        }
+        {
+            let buf = buffer_lender.lend_u8();
+            assert!(buf.is_empty());
+            buf.push(1u8);
+        }
+        {
+            let (_, buf) = buffer_lender.lend_all();
+            assert!(buf.is_empty());
+            buf.push(1u32);
+        }
+        {
+            let (_, buf) = buffer_lender.lend_all();
+            assert!(buf.is_empty());
+            buf.push(1u32);
+        }
+    }
+
+    #[test]
+    fn test_vint_u32() {
+        let mut buffer = vec![];
+        let vals = [0, 1, 324_234_234, u32::max_value()];
+        for &i in &vals {
+            assert!(write_u32_vint(i, &mut buffer).is_ok());
+        }
+        assert_eq!(buffer.len(), 1 + 1 + 5 + 5);
+        let res: Vec<u32> = VInt32Reader::new(&buffer[..]).collect();
+        assert_eq!(&res[..], &vals[..]);
+    }
+}
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -126,7 +126,6 @@ impl SegmentPostings {
 fn exponential_search(target: u32, arr: &[u32]) -> (usize, usize) {
    let mut start = 0;
    let end = arr.len();
-    debug_assert!(target >= arr[start]);
    debug_assert!(target <= arr[end - 1]);
    let mut jump = 1;
    loop {
@@ -216,11 +215,10 @@ impl DocSet for SegmentPostings {

        // we're in the right block now, start with an exponential search
        let block_docs = self.block_cursor.docs();
-
-        debug_assert!(target >= self.doc());
        let new_cur = self
            .cur
            .wrapping_add(search_within_block(&block_docs[self.cur..], target));
+
        if need_positions {
            sum_freqs_skipped += self.block_cursor.freqs()[self.cur..new_cur]
                .iter()
@@ -622,6 +620,7 @@ impl<'b> Streamer<'b> for BlockSegmentPostings {
 #[cfg(test)]
 mod tests {

+    use super::exponential_search;
    use super::search_within_block;
    use super::BlockSegmentPostings;
    use super::BlockSegmentPostingsSkipResult;
@@ -635,6 +634,7 @@ mod tests {
    use schema::Term;
    use schema::INT_INDEXED;
    use DocId;
+    use SkipResult;

    #[test]
    fn test_empty_segment_postings() {
@@ -662,6 +662,16 @@ mod tests {
            .0
    }

+    #[test]
+    fn test_exponentiel_search() {
+        assert_eq!(exponential_search(0, &[1, 2]), (0, 1));
+        assert_eq!(exponential_search(1, &[1, 2]), (0, 1));
+        assert_eq!(
+            exponential_search(7, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
+            (3, 7)
+        );
+    }
+
    fn util_test_search_within_block(block: &[u32], target: u32) {
        assert_eq!(
            search_within_block(block, target),
@@ -693,7 +703,7 @@ mod tests {

    #[test]
    fn test_block_segment_postings() {
-        let mut block_segments = build_block_postings((0..100_000).collect::<Vec<u32>>());
+        let mut block_segments = build_block_postings(&(0..100_000).collect::<Vec<u32>>());
        let mut offset: u32 = 0u32;
        // checking that the block before calling advance is empty
        assert!(block_segments.docs().is_empty());
@@ -707,14 +717,44 @@ mod tests {
        }
    }

-    fn build_block_postings(docs: Vec<DocId>) -> BlockSegmentPostings {
+    #[test]
+    fn test_skip_right_at_new_block() {
+        let mut doc_ids = (0..128).collect::<Vec<u32>>();
+        doc_ids.push(129);
+        doc_ids.push(130);
+        {
+            let block_segments = build_block_postings(&doc_ids);
+            let mut docset = SegmentPostings::from_block_postings(block_segments, None);
+            assert_eq!(docset.skip_next(128), SkipResult::OverStep);
+            assert_eq!(docset.doc(), 129);
+            assert!(docset.advance());
+            assert_eq!(docset.doc(), 130);
+            assert!(!docset.advance());
+        }
+        {
+            let block_segments = build_block_postings(&doc_ids);
+            let mut docset = SegmentPostings::from_block_postings(block_segments, None);
+            assert_eq!(docset.skip_next(129), SkipResult::Reached);
+            assert_eq!(docset.doc(), 129);
+            assert!(docset.advance());
+            assert_eq!(docset.doc(), 130);
+            assert!(!docset.advance());
+        }
+        {
+            let block_segments = build_block_postings(&doc_ids);
+            let mut docset = SegmentPostings::from_block_postings(block_segments, None);
+            assert_eq!(docset.skip_next(131), SkipResult::End);
+        }
+    }
+
+    fn build_block_postings(docs: &[DocId]) -> BlockSegmentPostings {
        let mut schema_builder = Schema::builder();
        let int_field = schema_builder.add_u64_field("id", INT_INDEXED);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
        let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
        let mut last_doc = 0u32;
-        for doc in docs {
+        for &doc in docs {
            for _ in last_doc..doc {
                index_writer.add_document(doc!(int_field=>1u64));
            }
@@ -734,7 +774,7 @@ mod tests {
    #[test]
    fn test_block_segment_postings_skip() {
        for i in 0..4 {
-            let mut block_postings = build_block_postings(vec![3]);
+            let mut block_postings = build_block_postings(&[3]);
            assert_eq!(
                block_postings.skip_to(i),
                BlockSegmentPostingsSkipResult::Success(0u32)
@@ -744,7 +784,7 @@ mod tests {
                BlockSegmentPostingsSkipResult::Terminated
            );
        }
-        let mut block_postings = build_block_postings(vec![3]);
+        let mut block_postings = build_block_postings(&[3]);
        assert_eq!(
            block_postings.skip_to(4u32),
            BlockSegmentPostingsSkipResult::Terminated
@@ -757,7 +797,7 @@ mod tests {
        for i in 0..1300 {
            docs.push((i * i / 100) + i);
        }
-        let mut block_postings = build_block_postings(docs.clone());
+        let mut block_postings = build_block_postings(&docs[..]);
        for i in vec![0, 424, 10000] {
            assert_eq!(
                block_postings.skip_to(i),
--- a/src/postings/stacker/expull.rs
+++ b/src/postings/stacker/expull.rs
@@ -1,28 +1,37 @@
 use super::{Addr, MemoryArena};

-use common::is_power_of_2;
+use postings::stacker::memory_arena::load;
+use postings::stacker::memory_arena::store;
+use std::io;
 use std::mem;

 const MAX_BLOCK_LEN: u32 = 1u32 << 15;
+const FIRST_BLOCK: usize = 16;
+const INLINED_BLOCK_LEN: usize = FIRST_BLOCK + mem::size_of::<Addr>();

-const FIRST_BLOCK: u32 = 4u32;
+enum CapacityResult {
+    Available(u32),
+    NeedAlloc(u32),
+}

-#[inline]
-pub fn jump_needed(len: u32) -> Option<usize> {
+fn len_to_capacity(len: u32) -> CapacityResult {
    match len {
-        0...3 => None,
-        4...MAX_BLOCK_LEN => {
-            if is_power_of_2(len as usize) {
-                Some(len as usize)
+        0...15 => CapacityResult::Available(FIRST_BLOCK as u32 - len),
+        16...MAX_BLOCK_LEN => {
+            let cap = 1 << (32u32 - (len - 1u32).leading_zeros());
+            let available = cap - len;
+            if available == 0 {
+                CapacityResult::NeedAlloc(len)
            } else {
-                None
+                CapacityResult::Available(available)
            }
        }
        n => {
-            if n % MAX_BLOCK_LEN == 0 {
-                Some(MAX_BLOCK_LEN as usize)
+            let available = n % MAX_BLOCK_LEN;
+            if available == 0 {
+                CapacityResult::NeedAlloc(MAX_BLOCK_LEN)
            } else {
-                None
+                CapacityResult::Available(MAX_BLOCK_LEN - available)
            }
        }
    }
@@ -52,82 +61,119 @@ pub fn jump_needed(len: u32) -> Option<usize> {
 #[derive(Debug, Clone, Copy)]
 pub struct ExpUnrolledLinkedList {
    len: u32,
-    head: Addr,
    tail: Addr,
+    inlined_data: [u8; INLINED_BLOCK_LEN as usize],
+}
+
+pub struct ExpUnrolledLinkedListWriter<'a> {
+    eull: &'a mut ExpUnrolledLinkedList,
+    heap: &'a mut MemoryArena,
+}
+
+fn ensure_capacity<'a>(
+    eull: &'a mut ExpUnrolledLinkedList,
+    heap: &'a mut MemoryArena,
+) -> &'a mut [u8] {
+    if eull.len <= FIRST_BLOCK as u32 {
+        // We are still hitting the inline block.
+        if eull.len < FIRST_BLOCK as u32 {
+            return &mut eull.inlined_data[eull.len as usize..FIRST_BLOCK];
+        }
+        // We need to allocate a new block!
+        let new_block_addr: Addr = heap.allocate_space(FIRST_BLOCK + mem::size_of::<Addr>());
+        store(&mut eull.inlined_data[FIRST_BLOCK..], new_block_addr);
+        eull.tail = new_block_addr;
+        return heap.slice_mut(eull.tail, FIRST_BLOCK);
+    }
+    let len = match len_to_capacity(eull.len) {
+        CapacityResult::NeedAlloc(new_block_len) => {
+            let new_block_addr: Addr =
+                heap.allocate_space(new_block_len as usize + mem::size_of::<Addr>());
+            heap.write_at(eull.tail, new_block_addr);
+            eull.tail = new_block_addr;
+            new_block_len
+        }
+        CapacityResult::Available(available) => available,
+    };
+    heap.slice_mut(eull.tail, len as usize)
+}
+
+impl<'a> ExpUnrolledLinkedListWriter<'a> {
+    pub fn extend_from_slice(&mut self, mut buf: &[u8]) {
+        if buf.is_empty() {
+            // we need to cut early, because `ensure_capacity`
+            // allocates if there is no capacity at all right now.
+            return;
+        }
+        while !buf.is_empty() {
+            let add_len: usize;
+            {
+                let output_buf = ensure_capacity(self.eull, self.heap);
+                add_len = buf.len().min(output_buf.len());
+                output_buf[..add_len].copy_from_slice(&buf[..add_len]);
+            }
+            self.eull.len += add_len as u32;
+            self.eull.tail = self.eull.tail.offset(add_len as u32);
+            buf = &buf[add_len..];
+        }
+    }
+}
+
+impl<'a> io::Write for ExpUnrolledLinkedListWriter<'a> {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        // There is no use case to only write the capacity.
+        // This is not IO after all, so we write the whole
+        // buffer even if the contract of `.write` is looser.
+        self.extend_from_slice(buf);
+        Ok(buf.len())
+    }
+
+    fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
+        self.extend_from_slice(buf);
+        Ok(())
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        Ok(())
+    }
 }

 impl ExpUnrolledLinkedList {
-    pub fn new(heap: &mut MemoryArena) -> ExpUnrolledLinkedList {
-        let addr = heap.allocate_space((FIRST_BLOCK as usize) * mem::size_of::<u32>());
+    pub fn new() -> ExpUnrolledLinkedList {
        ExpUnrolledLinkedList {
            len: 0u32,
-            head: addr,
-            tail: addr,
+            tail: Addr::null_pointer(),
+            inlined_data: [0u8; INLINED_BLOCK_LEN as usize],
        }
    }

-    pub fn iter<'a>(&self, heap: &'a MemoryArena) -> ExpUnrolledLinkedListIterator<'a> {
-        ExpUnrolledLinkedListIterator {
-            heap,
-            addr: self.head,
-            len: self.len,
-            consumed: 0,
-        }
+    #[inline(always)]
+    pub fn writer<'a>(&'a mut self, heap: &'a mut MemoryArena) -> ExpUnrolledLinkedListWriter<'a> {
+        ExpUnrolledLinkedListWriter { eull: self, heap }
    }

-    /// Appends a new element to the current stack.
-    ///
-    /// If the current block end is reached, a new block is allocated.
-    pub fn push(&mut self, val: u32, heap: &mut MemoryArena) {
-        self.len += 1;
-        if let Some(new_block_len) = jump_needed(self.len) {
-            // We need to allocate another block.
-            // We also allocate an extra `u32` to store the pointer
-            // to the future next block.
-            let new_block_size: usize = (new_block_len + 1) * mem::size_of::<u32>();
-            let new_block_addr: Addr = heap.allocate_space(new_block_size);
-            unsafe {
-                // logic
-                heap.write(self.tail, new_block_addr)
-            };
-            self.tail = new_block_addr;
+    pub fn read_to_end(&self, heap: &MemoryArena, output: &mut Vec<u8>) {
+        let len = self.len as usize;
+        if len <= FIRST_BLOCK {
+            output.extend_from_slice(&self.inlined_data[..len]);
+            return;
        }
-        unsafe {
-            // logic
-            heap.write(self.tail, val);
-            self.tail = self.tail.offset(mem::size_of::<u32>() as u32);
-        }
-    }
-}
-
-pub struct ExpUnrolledLinkedListIterator<'a> {
-    heap: &'a MemoryArena,
-    addr: Addr,
-    len: u32,
-    consumed: u32,
-}
-
-impl<'a> Iterator for ExpUnrolledLinkedListIterator<'a> {
-    type Item = u32;
-
-    fn next(&mut self) -> Option<u32> {
-        if self.consumed == self.len {
-            None
-        } else {
-            self.consumed += 1;
-            let addr: Addr = if jump_needed(self.consumed).is_some() {
-                unsafe {
-                    // logic
-                    self.heap.read(self.addr)
-                }
-            } else {
-                self.addr
-            };
-            self.addr = addr.offset(mem::size_of::<u32>() as u32);
-            Some(unsafe {
-                // logic
-                self.heap.read(addr)
-            })
+        output.extend_from_slice(&self.inlined_data[..FIRST_BLOCK]);
+        let mut cur = FIRST_BLOCK;
+        let mut addr = load(&self.inlined_data[FIRST_BLOCK..]);
+        loop {
+            let cap = match len_to_capacity(cur as u32) {
+                CapacityResult::Available(capacity) => capacity,
+                CapacityResult::NeedAlloc(capacity) => capacity,
+            } as usize;
+            let data = heap.slice(addr, cap);
+            if cur + cap >= len {
+                output.extend_from_slice(&data[..(len - cur)]);
+                return;
+            }
+            output.extend_from_slice(data);
+            cur += cap;
+            addr = heap.read(addr.offset(cap as u32));
        }
    }
 }
@@ -136,39 +182,126 @@ impl<'a> Iterator for ExpUnrolledLinkedListIterator<'a> {
 mod tests {

    use super::super::MemoryArena;
-    use super::jump_needed;
+    use super::len_to_capacity;
    use super::*;
+    use byteorder::{ByteOrder, LittleEndian, WriteBytesExt};

+    #[test]
    #[test]
    fn test_stack() {
        let mut heap = MemoryArena::new();
-        let mut stack = ExpUnrolledLinkedList::new(&mut heap);
-        stack.push(1u32, &mut heap);
-        stack.push(2u32, &mut heap);
-        stack.push(4u32, &mut heap);
-        stack.push(8u32, &mut heap);
+        let mut stack = ExpUnrolledLinkedList::new();
+        stack.writer(&mut heap).extend_from_slice(&[1u8]);
+        stack.writer(&mut heap).extend_from_slice(&[2u8]);
+        stack.writer(&mut heap).extend_from_slice(&[3u8, 4u8]);
+        stack.writer(&mut heap).extend_from_slice(&[5u8]);
        {
-            let mut it = stack.iter(&heap);
-            assert_eq!(it.next().unwrap(), 1u32);
-            assert_eq!(it.next().unwrap(), 2u32);
-            assert_eq!(it.next().unwrap(), 4u32);
-            assert_eq!(it.next().unwrap(), 8u32);
-            assert!(it.next().is_none());
+            let mut buffer = Vec::new();
+            stack.read_to_end(&heap, &mut buffer);
+            assert_eq!(&buffer[..], &[1u8, 2u8, 3u8, 4u8, 5u8]);
        }
    }

    #[test]
-    fn test_jump_if_needed() {
-        let mut block_len = 4u32;
-        let mut i = 0;
-        while i < 10_000_000 {
-            assert!(jump_needed(i + block_len - 1).is_none());
-            assert!(jump_needed(i + block_len + 1).is_none());
-            assert!(jump_needed(i + block_len).is_some());
-            let new_block_len = jump_needed(i + block_len).unwrap();
-            i += block_len;
-            block_len = new_block_len as u32;
+    fn test_stack_long() {
+        let mut heap = MemoryArena::new();
+        let mut stack = ExpUnrolledLinkedList::new();
+        let source: Vec<u32> = (0..100).collect();
+        for &el in &source {
+            assert!(stack
+                .writer(&mut heap)
+                .write_u32::<LittleEndian>(el)
+                .is_ok());
        }
+        let mut buffer = Vec::new();
+        stack.read_to_end(&heap, &mut buffer);
+        let mut result = vec![];
+        let mut remaining = &buffer[..];
+        while !remaining.is_empty() {
+            result.push(LittleEndian::read_u32(&remaining[..4]));
+            remaining = &remaining[4..];
+        }
+        assert_eq!(&result[..], &source[..]);
+    }
+
+    #[test]
+    fn test_stack_interlaced() {
+        let mut heap = MemoryArena::new();
+        let mut stack = ExpUnrolledLinkedList::new();
+        let mut stack2 = ExpUnrolledLinkedList::new();
+
+        let mut vec1: Vec<u8> = vec![];
+        let mut vec2: Vec<u8> = vec![];
+
+        for i in 0..9 {
+            assert!(stack.writer(&mut heap).write_u32::<LittleEndian>(i).is_ok());
+            assert!(vec1.write_u32::<LittleEndian>(i).is_ok());
+            if i % 2 == 0 {
+                assert!(stack2
+                    .writer(&mut heap)
+                    .write_u32::<LittleEndian>(i)
+                    .is_ok());
+                assert!(vec2.write_u32::<LittleEndian>(i).is_ok());
+            }
+        }
+        let mut res1 = vec![];
+        let mut res2 = vec![];
+        stack.read_to_end(&heap, &mut res1);
+        stack2.read_to_end(&heap, &mut res2);
+        assert_eq!(&vec1[..], &res1[..]);
+        assert_eq!(&vec2[..], &res2[..]);
+    }
+
+    #[test]
+    fn test_jump_if_needed() {
+        let mut available = 16u32;
+        for i in 0..10_000_000 {
+            match len_to_capacity(i) {
+                CapacityResult::NeedAlloc(cap) => {
+                    assert_eq!(available, 0, "Failed len={}: Expected 0 got {}", i, cap);
+                    available = cap;
+                }
+                CapacityResult::Available(cap) => {
+                    assert_eq!(
+                        available, cap,
+                        "Failed len={}: Expected {} Got {}",
+                        i, available, cap
+                    );
+                }
+            }
+            available -= 1;
+        }
+    }
+
+    #[test]
+    fn test_jump_if_needed_progression() {
+        let mut v = vec![];
+        for i in 0.. {
+            if v.len() >= 10 {
+                break;
+            }
+            match len_to_capacity(i) {
+                CapacityResult::NeedAlloc(cap) => {
+                    v.push((i, cap));
+                }
+                _ => {}
+            }
+        }
+        assert_eq!(
+            &v[..],
+            &[
+                (16, 16),
+                (32, 32),
+                (64, 64),
+                (128, 128),
+                (256, 256),
+                (512, 512),
+                (1024, 1024),
+                (2048, 2048),
+                (4096, 4096),
+                (8192, 8192)
+            ]
+        );
    }
 }

@@ -176,6 +309,7 @@ mod tests {
 mod bench {
    use super::super::MemoryArena;
    use super::ExpUnrolledLinkedList;
+    use byteorder::{NativeEndian, WriteBytesExt};
    use test::Bencher;

    const NUM_STACK: usize = 10_000;
@@ -203,13 +337,13 @@ mod bench {
            let mut heap = MemoryArena::new();
            let mut stacks = Vec::with_capacity(100);
            for _ in 0..NUM_STACK {
-                let mut stack = ExpUnrolledLinkedList::new(&mut heap);
+                let mut stack = ExpUnrolledLinkedList::new();
                stacks.push(stack);
            }
            for s in 0..NUM_STACK {
                for i in 0u32..STACK_SIZE {
                    let t = s * 392017 % NUM_STACK;
-                    stacks[t].push(i, &mut heap);
+                    let _ = stacks[t].writer(&mut heap).write_u32::<NativeEndian>(i);
                }
            }
        });
--- a/src/postings/stacker/memory_arena.rs
+++ b/src/postings/stacker/memory_arena.rs
@@ -37,7 +37,7 @@ const PAGE_SIZE: usize = 1 << NUM_BITS_PAGE_ADDR; // pages are 1 MB large
 /// page of memory.
 ///
 /// The last 20 bits are an address within this page of memory.
-#[derive(Clone, Copy, Debug)]
+#[derive(Copy, Clone, Debug)]
 pub struct Addr(u32);

 impl Addr {
@@ -69,32 +69,16 @@ impl Addr {
    }
 }

-/// Trait required for an object to be `storable`.
-///
-/// # Warning
-///
-/// Most of the time you should not implement this trait,
-/// and only use the `MemoryArena` with object implementing `Copy`.
-///
-/// `ArenaStorable` is used in `tantivy` to force
-/// a `Copy` object and a `slice` of data to be stored contiguously.
-pub trait ArenaStorable {
-    fn num_bytes(&self) -> usize;
-    unsafe fn write_into(self, arena: &mut MemoryArena, addr: Addr);
+pub fn store<Item: Copy + 'static>(dest: &mut [u8], val: Item) {
+    assert_eq!(dest.len(), std::mem::size_of::<Item>());
+    unsafe {
+        ptr::write_unaligned(dest.as_mut_ptr() as *mut Item, val);
+    }
 }

-impl<V> ArenaStorable for V
-where
-    V: Copy,
-{
-    fn num_bytes(&self) -> usize {
-        mem::size_of::<V>()
-    }
-
-    unsafe fn write_into(self, arena: &mut MemoryArena, addr: Addr) {
-        let dst_ptr = arena.get_mut_ptr(addr) as *mut V;
-        ptr::write_unaligned(dst_ptr, self);
-    }
+pub fn load<Item: Copy + 'static>(data: &[u8]) -> Item {
+    assert_eq!(data.len(), std::mem::size_of::<Item>());
+    unsafe { ptr::read_unaligned(data.as_ptr() as *const Item) }
 }

 /// The `MemoryArena`
@@ -126,47 +110,9 @@ impl MemoryArena {
        self.pages.len() * PAGE_SIZE
    }

-    /// Writes a slice at the given address, assuming the
-    /// memory was allocated beforehands.
-    ///
-    ///  # Panics
-    ///
-    /// May panic or corrupt the heap if he space was not
-    /// properly allocated beforehands.
-    pub fn write_bytes<B: AsRef<[u8]>>(&mut self, addr: Addr, data: B) {
-        let bytes = data.as_ref();
-        self.pages[addr.page_id()]
-            .get_mut_slice(addr.page_local_addr(), bytes.len())
-            .copy_from_slice(bytes);
-    }
-
-    /// Returns the `len` bytes starting at `addr`
-    ///
-    /// # Panics
-    ///
-    /// Panics if the memory has not been allocated beforehands.
-    pub fn read_slice(&self, addr: Addr, len: usize) -> &[u8] {
-        self.pages[addr.page_id()].get_slice(addr.page_local_addr(), len)
-    }
-
-    unsafe fn get_mut_ptr(&mut self, addr: Addr) -> *mut u8 {
-        self.pages[addr.page_id()].get_mut_ptr(addr.page_local_addr())
-    }
-
-    /// Stores an item's data in the heap
-    ///
-    /// It allocates the `Item` beforehands.
-    pub fn store<Item: ArenaStorable>(&mut self, val: Item) -> Addr {
-        let num_bytes = val.num_bytes();
-        let addr = self.allocate_space(num_bytes);
-        unsafe {
-            self.write(addr, val);
-        };
-        addr
-    }
-
-    pub unsafe fn write<Item: ArenaStorable>(&mut self, addr: Addr, val: Item) {
-        val.write_into(self, addr)
+    pub fn write_at<Item: Copy + 'static>(&mut self, addr: Addr, val: Item) {
+        let dest = self.slice_mut(addr, std::mem::size_of::<Item>());
+        store(dest, val);
    }

    /// Read an item in the heap at the given `address`.
@@ -174,9 +120,21 @@ impl MemoryArena {
    /// # Panics
    ///
    /// If the address is erroneous
-    pub unsafe fn read<Item: Copy>(&self, addr: Addr) -> Item {
-        let ptr = self.pages[addr.page_id()].get_ptr(addr.page_local_addr());
-        ptr::read_unaligned(ptr as *const Item)
+    pub fn read<Item: Copy + 'static>(&self, addr: Addr) -> Item {
+        load(self.slice(addr, mem::size_of::<Item>()))
+    }
+
+    pub fn slice(&self, addr: Addr, len: usize) -> &[u8] {
+        self.pages[addr.page_id()].slice(addr.page_local_addr(), len)
+    }
+
+    pub fn slice_from(&self, addr: Addr) -> &[u8] {
+        self.pages[addr.page_id()].slice_from(addr.page_local_addr())
+    }
+
+    #[inline(always)]
+    pub fn slice_mut(&mut self, addr: Addr, len: usize) -> &mut [u8] {
+        self.pages[addr.page_id()].slice_mut(addr.page_local_addr(), len)
    }

    /// Allocates `len` bytes and returns the allocated address.
@@ -197,14 +155,10 @@ struct Page {

 impl Page {
    fn new(page_id: usize) -> Page {
-        let mut data: Vec<u8> = Vec::with_capacity(PAGE_SIZE);
-        unsafe {
-            data.set_len(PAGE_SIZE);
-        } // avoid initializing page
        Page {
            page_id,
            len: 0,
-            data: data.into_boxed_slice(),
+            data: vec![0u8; PAGE_SIZE].into_boxed_slice(),
        }
    }

@@ -213,12 +167,16 @@ impl Page {
        len + self.len <= PAGE_SIZE
    }

-    fn get_mut_slice(&mut self, local_addr: usize, len: usize) -> &mut [u8] {
-        &mut self.data[local_addr..][..len]
+    fn slice(&self, local_addr: usize, len: usize) -> &[u8] {
+        &self.slice_from(local_addr)[..len]
    }

-    fn get_slice(&self, local_addr: usize, len: usize) -> &[u8] {
-        &self.data[local_addr..][..len]
+    fn slice_from(&self, local_addr: usize) -> &[u8] {
+        &self.data[local_addr..]
+    }
+
+    fn slice_mut(&mut self, local_addr: usize, len: usize) -> &mut [u8] {
+        &mut self.data[local_addr..][..len]
    }

    fn allocate_space(&mut self, len: usize) -> Option<Addr> {
@@ -230,16 +188,6 @@ impl Page {
            None
        }
    }
-
-    #[inline(always)]
-    pub(crate) unsafe fn get_ptr(&self, addr: usize) -> *const u8 {
-        self.data.as_ptr().add(addr)
-    }
-
-    #[inline(always)]
-    pub(crate) unsafe fn get_mut_ptr(&mut self, addr: usize) -> *mut u8 {
-        self.data.as_mut_ptr().add(addr)
-    }
 }

 #[cfg(test)]
@@ -254,13 +202,13 @@ mod tests {
        let b = b"happy tax payer";

        let addr_a = arena.allocate_space(a.len());
-        arena.write_bytes(addr_a, a);
+        arena.slice_mut(addr_a, a.len()).copy_from_slice(a);

        let addr_b = arena.allocate_space(b.len());
-        arena.write_bytes(addr_b, b);
+        arena.slice_mut(addr_b, b.len()).copy_from_slice(b);

-        assert_eq!(arena.read_slice(addr_a, a.len()), a);
-        assert_eq!(arena.read_slice(addr_b, b.len()), b);
+        assert_eq!(arena.slice(addr_a, a.len()), a);
+        assert_eq!(arena.slice(addr_b, b.len()), b);
    }

    #[derive(Clone, Copy, Debug, Eq, PartialEq)]
@@ -283,9 +231,15 @@ mod tests {
            b: 221,
            c: 12,
        };
-        let addr_a = arena.store(a);
-        let addr_b = arena.store(b);
-        assert_eq!(unsafe { arena.read::<MyTest>(addr_a) }, a);
-        assert_eq!(unsafe { arena.read::<MyTest>(addr_b) }, b);
+
+        let num_bytes = std::mem::size_of::<MyTest>();
+        let addr_a = arena.allocate_space(num_bytes);
+        arena.write_at(addr_a, a);
+
+        let addr_b = arena.allocate_space(num_bytes);
+        arena.write_at(addr_b, b);
+
+        assert_eq!(arena.read::<MyTest>(addr_a), a);
+        assert_eq!(arena.read::<MyTest>(addr_b), b);
    }
 }
--- a/src/postings/stacker/mod.rs
+++ b/src/postings/stacker/mod.rs
@@ -1,9 +1,7 @@
 mod expull;
 mod memory_arena;
-mod murmurhash2;
 mod term_hashmap;

 pub use self::expull::ExpUnrolledLinkedList;
-pub use self::memory_arena::{Addr, ArenaStorable, MemoryArena};
-use self::murmurhash2::murmurhash2;
+pub use self::memory_arena::{Addr, MemoryArena};
 pub use self::term_hashmap::{compute_table_size, TermHashMap};
--- a/src/postings/stacker/murmurhash2.rs
+++ b/src/postings/stacker/murmurhash2.rs
@@ -1,87 +0,0 @@
-use std::ptr;
-const SEED: u32 = 3_242_157_231u32;
-const M: u32 = 0x5bd1_e995;
-
-#[inline(always)]
-pub fn murmurhash2(key: &[u8]) -> u32 {
-    #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
-    let mut key_ptr: *const u32 = key.as_ptr() as *const u32;
-    let len = key.len() as u32;
-    let mut h: u32 = SEED ^ len;
-
-    let num_blocks = len >> 2;
-    for _ in 0..num_blocks {
-        let mut k: u32 = unsafe { ptr::read_unaligned(key_ptr) }; // ok because of num_blocks definition
-        k = k.wrapping_mul(M);
-        k ^= k >> 24;
-        k = k.wrapping_mul(M);
-        h = h.wrapping_mul(M);
-        h ^= k;
-        key_ptr = key_ptr.wrapping_offset(1);
-    }
-
-    // Handle the last few bytes of the input array
-    let remaining: &[u8] = &key[key.len() & !3..];
-    match remaining.len() {
-        3 => {
-            h ^= u32::from(remaining[2]) << 16;
-            h ^= u32::from(remaining[1]) << 8;
-            h ^= u32::from(remaining[0]);
-            h = h.wrapping_mul(M);
-        }
-        2 => {
-            h ^= u32::from(remaining[1]) << 8;
-            h ^= u32::from(remaining[0]);
-            h = h.wrapping_mul(M);
-        }
-        1 => {
-            h ^= u32::from(remaining[0]);
-            h = h.wrapping_mul(M);
-        }
-        _ => {}
-    }
-    h ^= h >> 13;
-    h = h.wrapping_mul(M);
-    h ^ (h >> 15)
-}
-
-#[cfg(test)]
-mod test {
-
-    use super::murmurhash2;
-    use std::collections::HashSet;
-
-    #[test]
-    fn test_murmur() {
-        let s1 = "abcdef";
-        let s2 = "abcdeg";
-        for i in 0..5 {
-            assert_eq!(
-                murmurhash2(&s1[i..5].as_bytes()),
-                murmurhash2(&s2[i..5].as_bytes())
-            );
-        }
-    }
-
-    #[test]
-    fn test_murmur_against_reference_impl() {
-        assert_eq!(murmurhash2("".as_bytes()), 3632506080);
-        assert_eq!(murmurhash2("a".as_bytes()), 455683869);
-        assert_eq!(murmurhash2("ab".as_bytes()), 2448092234);
-        assert_eq!(murmurhash2("abc".as_bytes()), 2066295634);
-        assert_eq!(murmurhash2("abcd".as_bytes()), 2588571162);
-        assert_eq!(murmurhash2("abcde".as_bytes()), 2988696942);
-        assert_eq!(murmurhash2("abcdefghijklmnop".as_bytes()), 2350868870);
-    }
-
-    #[test]
-    fn test_murmur_collisions() {
-        let mut set: HashSet<u32> = HashSet::default();
-        for i in 0..10_000 {
-            let s = format!("hash{}", i);
-            let hash = murmurhash2(s.as_bytes());
-            set.insert(hash);
-        }
-        assert_eq!(set.len(), 10_000);
-    }
-}
--- a/src/postings/stacker/term_hashmap.rs
+++ b/src/postings/stacker/term_hashmap.rs
@@ -1,37 +1,16 @@
-use super::murmurhash2;
-use super::{Addr, ArenaStorable, MemoryArena};
+extern crate murmurhash32;
+
+use self::murmurhash32::murmurhash2;
+
+use super::{Addr, MemoryArena};
+use byteorder::{ByteOrder, NativeEndian};
+use postings::stacker::memory_arena::store;
 use std::iter;
 use std::mem;
 use std::slice;

 pub type BucketId = usize;

-struct KeyBytesValue<'a, V> {
-    key: &'a [u8],
-    value: V,
-}
-
-impl<'a, V> KeyBytesValue<'a, V> {
-    fn new(key: &'a [u8], value: V) -> KeyBytesValue<'a, V> {
-        KeyBytesValue { key, value }
-    }
-}
-
-impl<'a, V> ArenaStorable for KeyBytesValue<'a, V>
-where
-    V: ArenaStorable,
-{
-    fn num_bytes(&self) -> usize {
-        0u16.num_bytes() + self.key.len() + self.value.num_bytes()
-    }
-
-    unsafe fn write_into(self, arena: &mut MemoryArena, addr: Addr) {
-        arena.write(addr, self.key.len() as u16);
-        arena.write_bytes(addr.offset(2), self.key);
-        arena.write(addr.offset(2 + self.key.len() as u32), self.value);
-    }
-}
-
 /// Returns the actual memory size in bytes
 /// required to create a table of size $2^num_bits$.
 pub fn compute_table_size(num_bits: usize) -> usize {
@@ -111,8 +90,7 @@ impl<'a> Iterator for Iter<'a> {
    fn next(&mut self) -> Option<Self::Item> {
        self.inner.next().cloned().map(move |bucket: usize| {
            let kv = self.hashmap.table[bucket];
-            let (key, offset): (&'a [u8], Addr) =
-                unsafe { self.hashmap.get_key_value(kv.key_value_addr) };
+            let (key, offset): (&'a [u8], Addr) = self.hashmap.get_key_value(kv.key_value_addr);
            (key, offset, bucket as BucketId)
        })
    }
@@ -143,12 +121,22 @@ impl TermHashMap {
        self.table.len() < self.occupied.len() * 3
    }

-    unsafe fn get_key_value(&self, addr: Addr) -> (&[u8], Addr) {
-        let key_bytes_len = self.heap.read::<u16>(addr) as usize;
-        let key_addr = addr.offset(2u32);
-        let key_bytes: &[u8] = self.heap.read_slice(key_addr, key_bytes_len);
-        let val_addr: Addr = key_addr.offset(key_bytes.len() as u32);
-        (key_bytes, val_addr)
+    #[inline(always)]
+    fn get_key_value(&self, addr: Addr) -> (&[u8], Addr) {
+        let data = self.heap.slice_from(addr);
+        let key_bytes_len = NativeEndian::read_u16(data) as usize;
+        let key_bytes: &[u8] = &data[2..][..key_bytes_len];
+        (key_bytes, addr.offset(2u32 + key_bytes_len as u32))
+    }
+
+    #[inline(always)]
+    fn get_value_addr_if_key_match(&self, target_key: &[u8], addr: Addr) -> Option<Addr> {
+        let (stored_key, value_addr) = self.get_key_value(addr);
+        if stored_key == target_key {
+            Some(value_addr)
+        } else {
+            None
+        }
    }

    pub fn set_bucket(&mut self, hash: u32, key_value_addr: Addr, bucket: usize) {
@@ -199,36 +187,39 @@ impl TermHashMap {
    pub fn mutate_or_create<S, V, TMutator>(&mut self, key: S, mut updater: TMutator) -> BucketId
    where
        S: AsRef<[u8]>,
-        V: Copy,
+        V: Copy + 'static,
        TMutator: FnMut(Option<V>) -> V,
    {
        if self.is_saturated() {
            self.resize();
        }
        let key_bytes: &[u8] = key.as_ref();
-        let hash = murmurhash2::murmurhash2(key.as_ref());
+        let hash = murmurhash2(key.as_ref());
        let mut probe = self.probe(hash);
        loop {
            let bucket = probe.next_probe();
            let kv: KeyValue = self.table[bucket];
            if kv.is_empty() {
                let val = updater(None);
-                let key_addr = self.heap.store(KeyBytesValue::new(key_bytes, val));
+                let num_bytes =
+                    std::mem::size_of::<u16>() + key_bytes.len() + std::mem::size_of::<V>();
+                let key_addr = self.heap.allocate_space(num_bytes);
+                {
+                    let data = self.heap.slice_mut(key_addr, num_bytes);
+                    NativeEndian::write_u16(data, key_bytes.len() as u16);
+                    let stop = 2 + key_bytes.len();
+                    data[2..stop].copy_from_slice(key_bytes);
+                    store(&mut data[stop..], val);
+                }
                self.set_bucket(hash, key_addr, bucket);
                return bucket as BucketId;
            } else if kv.hash == hash {
-                let (key_matches, val_addr) = {
-                    let (stored_key, val_addr): (&[u8], Addr) =
-                        unsafe { self.get_key_value(kv.key_value_addr) };
-                    (stored_key == key_bytes, val_addr)
-                };
-                if key_matches {
-                    unsafe {
-                        // logic
-                        let v = self.heap.read(val_addr);
-                        let new_v = updater(Some(v));
-                        self.heap.write(val_addr, new_v);
-                    };
+                if let Some(val_addr) =
+                    self.get_value_addr_if_key_match(key_bytes, kv.key_value_addr)
+                {
+                    let v = self.heap.read(val_addr);
+                    let new_v = updater(Some(v));
+                    self.heap.write_at(val_addr, new_v);
                    return bucket as BucketId;
                }
            }
@@ -236,24 +227,6 @@ impl TermHashMap {
    }
 }

-#[cfg(all(test, feature = "unstable"))]
-mod bench {
-    use super::murmurhash2::murmurhash2;
-    use test::Bencher;
-
-    #[bench]
-    fn bench_murmurhash2(b: &mut Bencher) {
-        let keys: [&'static str; 3] = ["wer qwe qwe qwe ", "werbq weqweqwe2 ", "weraq weqweqwe3 "];
-        b.iter(|| {
-            let mut s = 0;
-            for &key in &keys {
-                s ^= murmurhash2(key.as_bytes());
-            }
-            s
-        });
-    }
-}
-
 #[cfg(test)]
 mod tests {

@@ -285,10 +258,7 @@ mod tests {
        let mut vanilla_hash_map = HashMap::new();
        let mut iter_values = hash_map.iter();
        while let Some((key, addr, _)) = iter_values.next() {
-            let val: u32 = unsafe {
-                // test
-                hash_map.heap.read(addr)
-            };
+            let val: u32 = hash_map.heap.read(addr);
            vanilla_hash_map.insert(key.to_owned(), val);
        }
        assert_eq!(vanilla_hash_map.len(), 2);
--- a/src/schema/facet.rs
+++ b/src/schema/facet.rs
@@ -6,6 +6,7 @@ use std::borrow::Cow;
 use std::fmt::{self, Debug, Display, Formatter};
 use std::io::{self, Read, Write};
 use std::str;
+use std::string::FromUtf8Error;

 const SLASH_BYTE: u8 = b'/';
 const ESCAPE_BYTE: u8 = b'\\';
@@ -14,6 +15,10 @@ const ESCAPE_BYTE: u8 = b'\\';
 /// representation of facets.
 pub const FACET_SEP_BYTE: u8 = 0u8;

+/// `char` used as a level separation in the binary
+/// representation of facets. (It is the null codepoint.)
+pub const FACET_SEP_CHAR: char = '\u{0}';
+
 /// A Facet represent a point in a given hierarchy.
 ///
 /// They are typically represented similarly to a filepath.
@@ -26,18 +31,18 @@ pub const FACET_SEP_BYTE: u8 = 0u8;
 /// its facet. In the example above, `/electronics/tv_and_video/`
 /// and `/electronics`.
 #[derive(Clone, Eq, Hash, PartialEq, Ord, PartialOrd)]
-pub struct Facet(Vec<u8>);
+pub struct Facet(String);

 impl Facet {
    /// Returns a new instance of the "root facet"
    /// Equivalent to `/`.
    pub fn root() -> Facet {
-        Facet(vec![])
+        Facet("".to_string())
    }

    /// Returns true iff the facet is the root facet `/`.
    pub fn is_root(&self) -> bool {
-        self.encoded_bytes().is_empty()
+        self.encoded_str().is_empty()
    }

    /// Returns a binary representation of the facet.
@@ -49,13 +54,19 @@ impl Facet {
    /// This representation has the benefit of making it possible to
    /// express "being a child of a given facet" as a range over
    /// the term ordinals.
-    pub fn encoded_bytes(&self) -> &[u8] {
+    pub fn encoded_str(&self) -> &str {
        &self.0
    }

+    pub(crate) fn from_encoded_string(facet_string: String) -> Facet {
+        Facet(facet_string)
+    }
+
    /// Creates a `Facet` from its binary representation.
-    pub(crate) unsafe fn from_encoded(encoded_bytes: Vec<u8>) -> Facet {
-        Facet(encoded_bytes)
+    pub fn from_encoded(encoded_bytes: Vec<u8>) -> Result<Facet, FromUtf8Error> {
+        // facet bytes validation. `0u8` is used a separator but that is still legal utf-8
+        //Ok(Facet(String::from_utf8(encoded_bytes)?))
+        String::from_utf8(encoded_bytes).map(Facet)
    }

    /// Parse a text representation of a facet.
@@ -79,36 +90,37 @@ impl Facet {
        Path: IntoIterator,
        Path::Item: ToString,
    {
-        let mut facet_bytes: Vec<u8> = Vec::with_capacity(100);
+        let mut facet_string: String = String::with_capacity(100);
        let mut step_it = path.into_iter();
        if let Some(step) = step_it.next() {
-            facet_bytes.extend_from_slice(step.to_string().as_bytes());
+            facet_string.push_str(&step.to_string());
        }
        for step in step_it {
-            facet_bytes.push(FACET_SEP_BYTE);
-            facet_bytes.extend_from_slice(step.to_string().as_bytes());
+            facet_string.push(FACET_SEP_CHAR);
+            facet_string.push_str(&step.to_string());
        }
-        Facet(facet_bytes)
+        Facet(facet_string)
    }

    /// Accessor for the inner buffer of the `Facet`.
-    pub(crate) fn inner_buffer_mut(&mut self) -> &mut Vec<u8> {
-        &mut self.0
+    pub(crate) fn set_facet_str(&mut self, facet_str: &str) {
+        self.0.clear();
+        self.0.push_str(facet_str);
    }

    /// Returns `true` iff other is a subfacet of `self`.
    pub fn is_prefix_of(&self, other: &Facet) -> bool {
-        let self_bytes: &[u8] = self.encoded_bytes();
-        let other_bytes: &[u8] = other.encoded_bytes();
-        self_bytes.len() < other_bytes.len()
-            && other_bytes.starts_with(self_bytes)
-            && other_bytes[self_bytes.len()] == 0u8
+        let self_str = self.encoded_str();
+        let other_str = other.encoded_str();
+        self_str.len() < other_str.len()
+            && other_str.starts_with(self_str)
+            && other_str.as_bytes()[self_str.len()] == FACET_SEP_BYTE
    }
 }

-impl Borrow<[u8]> for Facet {
-    fn borrow(&self) -> &[u8] {
-        self.encoded_bytes()
+impl Borrow<str> for Facet {
+    fn borrow(&self) -> &str {
+        self.encoded_str()
    }
 }

@@ -120,45 +132,51 @@ impl<'a, T: ?Sized + AsRef<str>> From<&'a T> for Facet {
            Idle,
        }
        let path: &str = path_asref.as_ref();
-        let mut facet_encoded = Vec::new();
+        assert!(!path.is_empty());
+        assert!(path.starts_with('/'));
+        let mut facet_encoded = String::new();
        let mut state = State::Idle;
        let path_bytes = path.as_bytes();
-        for &c in &path_bytes[1..] {
+        let mut last_offset = 1;
+        for i in 1..path_bytes.len() {
+            let c = path_bytes[i];
            match (state, c) {
-                (State::Idle, ESCAPE_BYTE) => state = State::Escaped,
+                (State::Idle, ESCAPE_BYTE) => {
+                    facet_encoded.push_str(&path[last_offset..i]);
+                    last_offset = i + 1;
+                    state = State::Escaped
+                }
                (State::Idle, SLASH_BYTE) => {
-                    facet_encoded.push(FACET_SEP_BYTE);
+                    facet_encoded.push_str(&path[last_offset..i]);
+                    facet_encoded.push(FACET_SEP_CHAR);
+                    last_offset = i + 1;
                }
-                (State::Escaped, any_char) => {
+                (State::Escaped, _escaped_char) => {
                    state = State::Idle;
-                    facet_encoded.push(any_char);
-                }
-                (State::Idle, other_char) => {
-                    facet_encoded.push(other_char);
                }
+                (State::Idle, _any_char) => {}
            }
        }
+        facet_encoded.push_str(&path[last_offset..]);
        Facet(facet_encoded)
    }
 }

 impl BinarySerializable for Facet {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
-        <Vec<u8> as BinarySerializable>::serialize(&self.0, writer)
+        <String as BinarySerializable>::serialize(&self.0, writer)
    }

    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
-        let bytes = <Vec<u8> as BinarySerializable>::deserialize(reader)?;
-        Ok(Facet(bytes))
+        Ok(Facet(<String as BinarySerializable>::deserialize(reader)?))
    }
 }

 impl Display for Facet {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        for step in self.0.split(|&b| b == FACET_SEP_BYTE) {
+        for step in self.0.split(FACET_SEP_CHAR) {
            write!(f, "/")?;
-            let step_str = unsafe { str::from_utf8_unchecked(step) };
-            write!(f, "{}", escape_slashes(step_str))?;
+            write!(f, "{}", escape_slashes(step))?;
        }
        Ok(())
    }
--- a/src/schema/term.rs
+++ b/src/schema/term.rs
@@ -32,7 +32,7 @@ impl Term {

    /// Creates a `Term` given a facet.
    pub fn from_facet(field: Field, facet: &Facet) -> Term {
-        let bytes = facet.encoded_bytes();
+        let bytes = facet.encoded_str().as_bytes();
        let buffer = Vec::with_capacity(4 + bytes.len());
        let mut term = Term(buffer);
        term.set_field(field);
@@ -68,12 +68,7 @@ impl Term {
        term
    }

-    /// Creates a new Term with an empty buffer,
-    /// but with a given capacity.
-    ///
-    /// It is declared unsafe, as the term content
-    /// is not initialized, and a call to `.field()`
-    /// would panic.
+    /// Creates a new Term for a given field.
    pub(crate) fn for_field(field: Field) -> Term {
        let mut term = Term(Vec::with_capacity(100));
        term.set_field(field);
--- a/src/termdict/mod.rs
+++ b/src/termdict/mod.rs
@@ -167,7 +167,7 @@ mod tests {
        let mut term_string = String::new();
        while term_it.advance() {
            //let term = Term::from_bytes(term_it.key());
-            term_string.push_str(unsafe { str::from_utf8_unchecked(term_it.key()) }); // ok test
+            term_string.push_str(str::from_utf8(term_it.key()).expect("test"));
        }
        assert_eq!(&*term_string, "abcdef");
    }
--- a/src/termdict/term_info_store.rs
+++ b/src/termdict/term_info_store.rs
@@ -1,4 +1,4 @@
-use byteorder::ByteOrder;
+use byteorder::{ByteOrder, LittleEndian};
 use common::bitpacker::BitPacker;
 use common::compute_num_bits;
 use common::Endianness;
@@ -7,7 +7,6 @@ use directory::ReadOnlySource;
 use postings::TermInfo;
 use std::cmp;
 use std::io::{self, Read, Write};
-use std::ptr;
 use termdict::TermOrdinal;

 const BLOCK_LEN: usize = 256;
@@ -88,13 +87,17 @@ fn extract_bits(data: &[u8], addr_bits: usize, num_bits: u8) -> u64 {
    assert!(num_bits <= 56);
    let addr_byte = addr_bits / 8;
    let bit_shift = (addr_bits % 8) as u64;
-    assert!(data.len() >= addr_byte + 7);
-    let val_unshifted_unmasked: u64 = unsafe {
-        // ok because the pointer is only accessed using `ptr::read_unaligned`
-        #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
-        let addr = data.as_ptr().add(addr_byte) as *const u64;
-        // ok thanks to the 7 byte padding
-        ptr::read_unaligned(addr)
+    let val_unshifted_unmasked: u64 = if data.len() >= addr_byte + 8 {
+        LittleEndian::read_u64(&data[addr_byte..][..8])
+    } else {
+        // the buffer is not large enough.
+        // Let's copy the few remaining bytes to a 8 byte buffer
+        // padded with 0s.
+        let mut buf = [0u8; 8];
+        let data_to_copy = &data[addr_byte..];
+        let nbytes = data_to_copy.len();
+        buf[..nbytes].copy_from_slice(data_to_copy);
+        LittleEndian::read_u64(&buf)
    };
    let val_shifted_unmasked = val_unshifted_unmasked >> bit_shift;
    let mask = (1u64 << u64::from(num_bits)) - 1;
@@ -246,7 +249,6 @@ impl TermInfoStoreWriter {
        self.num_terms.serialize(write)?;
        write.write_all(&self.buffer_block_metas)?;
        write.write_all(&self.buffer_term_infos)?;
-        write.write_all(&[0u8; 7])?;
        Ok(())
    }
 }
--- a/src/tokenizer/facet_tokenizer.rs
+++ b/src/tokenizer/facet_tokenizer.rs
@@ -1,6 +1,5 @@
 use super::{Token, TokenStream, Tokenizer};
 use schema::FACET_SEP_BYTE;
-use std::str;

 /// The `FacetTokenizer` process a `Facet` binary representation
 /// and emits a token for all of its parent.
@@ -57,12 +56,11 @@ impl<'a> TokenStream for FacetTokenStream<'a> {
                    .position(|b| b == FACET_SEP_BYTE)
                    .map(|pos| cursor + 1 + pos)
                {
-                    let facet_part =
-                        unsafe { str::from_utf8_unchecked(&bytes[cursor..next_sep_pos]) };
+                    let facet_part = &self.text[cursor..next_sep_pos];
                    self.token.text.push_str(facet_part);
                    self.state = State::UpToPosition(next_sep_pos);
                } else {
-                    let facet_part = unsafe { str::from_utf8_unchecked(&bytes[cursor..]) };
+                    let facet_part = &self.text[cursor..];
                    self.token.text.push_str(facet_part);
                    self.state = State::Terminated;
                }
@@ -86,7 +84,6 @@ mod tests {

    use super::FacetTokenizer;
    use schema::Facet;
-    use std::str;
    use tokenizer::{Token, TokenStream, Tokenizer};

    #[test]
@@ -95,11 +92,11 @@ mod tests {
        let mut tokens = vec![];
        {
            let mut add_token = |token: &Token| {
-                let facet = unsafe { Facet::from_encoded(token.text.as_bytes().to_owned()) }; // ok test
+                let facet = Facet::from_encoded(token.text.as_bytes().to_owned()).unwrap();
                tokens.push(format!("{}", facet));
            };
            FacetTokenizer
-                .token_stream(unsafe { str::from_utf8_unchecked(facet.encoded_bytes()) })
+                .token_stream(facet.encoded_str())
                .process(&mut add_token);
        }
        assert_eq!(tokens.len(), 4);
@@ -115,11 +112,11 @@ mod tests {
        let mut tokens = vec![];
        {
            let mut add_token = |token: &Token| {
-                let facet = unsafe { Facet::from_encoded(token.text.as_bytes().to_owned()) }; // ok test
+                let facet = Facet::from_encoded(token.text.as_bytes().to_owned()).unwrap(); // ok test
                tokens.push(format!("{}", facet));
            };
            FacetTokenizer
-                .token_stream(unsafe { str::from_utf8_unchecked(facet.encoded_bytes()) }) // ok test
+                .token_stream(facet.encoded_str()) // ok test
                .process(&mut add_token);
        }
        assert_eq!(tokens.len(), 1);
Author	SHA1	Message	Date
Paul Masurel	fbe398dfa2	Added a broken unit test	2019-01-22 08:18:32 +09:00
Paul Masurel	0e8fcd5727	Plastic surgery	2019-01-19 23:13:27 +09:00
Paul Masurel	f745c83bb7	Closes 466. Removing mentions of the chain collector. (#467 )	2019-01-16 10:28:19 +09:00
Paul Masurel	ffb16d9103	More efficient indexing (#463 ) * Using unrolled u32 VInt and caching Vec s * cargo fmt * Exposing a io::Write in the Expull thing * expull as a writer. clippy + format * inline the first block * simplified -if let Some- * vint reader iterator * blop	2019-01-13 14:51:18 +09:00
Paul Masurel	98ca703daa	More efficient indexing (#462 ) * Using unrolled u32 VInt and caching Vec s * cargo fmt * Exposing a io::Write in the Expull thing * expull as a writer. clippy + format * inline the first block * simplified -if let Some- * vint reader iterator	2019-01-13 14:41:56 +09:00
Paul Masurel	b9d25cda5d	Using LittleEndian explicitely	2019-01-08 12:41:58 +09:00
Paul Masurel	beb4289ec2	Less unsafe	2019-01-08 00:48:14 +09:00
Andrew Banchich	bdd72e4683	Update README.md (#459 ) Fix Elasticsearch spelling	2018-12-27 07:26:49 +09:00
Paul Masurel	45c3cd19be	Fixing README: git clone https...	2018-12-26 21:13:33 +09:00
Paul Masurel	b8241c5603	0.8.0	2018-12-26 10:18:34 +09:00
Paul Masurel	a4745151c0	Version to 0.8	2018-12-26 10:11:06 +09:00
Paul Masurel	e2ce326a8c	Merge branch 'issue/457'	2018-12-18 10:35:01 +09:00
Paul Masurel	bb21d12a70	Bumping version	2018-12-18 10:14:12 +09:00
Paul Masurel	4565aba62a	Added unit test for exponential search	2018-12-18 09:24:31 +09:00
Paul Masurel	545a7ec8dd	Closes #457	2018-12-18 09:18:46 +09:00
Paul Masurel	e68775d71c	Format and update murmurhash32 version	2018-12-17 19:12:38 +09:00
Paul Masurel	dcc92d287e	Facet remove unsafe (#456 ) * Removing some unsafe * Removing some unsafe (2) * Remove murmurhash	2018-12-17 19:08:48 +09:00
Paul Masurel	b48f81c051	Removing unsafe from bitpacking code (#455 )	2018-12-17 19:06:37 +09:00
Paul Masurel	a3042e956b	Facet remove unsafe (#454 ) * Removing some unsafe * Removing some unsafe (2)	2018-12-17 09:31:09 +09:00
dependabot[bot]	1fa10f0a0b	Update itertools requirement from 0.7 to 0.8 (#453 ) Updates the requirements on [itertools](https://github.com/bluss/rust-itertools) to permit the latest version. - [Release notes](https://github.com/bluss/rust-itertools/releases) - [Commits](https://github.com/bluss/rust-itertools/commits/0.8.0) Signed-off-by: dependabot[bot] <support@dependabot.com>	2018-12-17 09:28:36 +09:00