From 4ae1d87632428df06c6401bd303734dfae052270 Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Wed, 15 Sep 2021 20:14:28 +0800
Subject: [PATCH 01/13] add DeleteBitSet iterator

---
 src/core/segment_reader.rs   | 15 ++++++++--
 src/fastfield/delete.rs      | 53 ++++++++++++++++++++++++++++++++++--
 src/indexer/merger.rs        | 38 ++++++++++++--------------
 src/indexer/segment_entry.rs |  2 --
 4 files changed, 81 insertions(+), 27 deletions(-)
diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs
index 73de5fb4c..45ee859f1 100644
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -287,8 +287,19 @@ impl SegmentReader {
     }
 
     /// Returns an iterator that will iterate over the alive document ids
-    pub fn doc_ids_alive(&self) -> impl Iterator<Item = DocId> + '_ {
-        (0u32..self.max_doc).filter(move |doc| !self.is_deleted(*doc))
+    pub fn doc_ids_alive(&self) -> Box<dyn Iterator<Item = DocId> + '_> {
+        if let Some(delete_bitset) = &self.delete_bitset_opt {
+            Box::new(
+                delete_bitset
+                    .iter()
+                    .take(self.max_doc() as usize)
+                    .enumerate()
+                    .filter(|(_docid, is_deleted)| !is_deleted)
+                    .map(|(docid, _is_deleted)| docid as DocId),
+            )
+        } else {
+            Box::new(0u32..self.max_doc)
+        }
     }
 
     /// Summarize total space usage of this segment.
diff --git a/src/fastfield/delete.rs b/src/fastfield/delete.rs
index 421761d63..f695f9968 100644
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -1,6 +1,5 @@
 use crate::directory::FileSlice;
 use crate::directory::OwnedBytes;
-use crate::directory::WritePtr;
 use crate::space_usage::ByteCount;
 use crate::DocId;
 use common::BitSet;
@@ -16,7 +15,7 @@ use std::io::Write;
 pub fn write_delete_bitset(
     delete_bitset: &BitSet,
     max_doc: u32,
-    writer: &mut WritePtr,
+    writer: &mut dyn Write,
 ) -> io::Result<()> {
     let mut byte = 0u8;
     let mut shift = 0u8;
@@ -79,6 +78,7 @@ impl DeleteBitSet {
     }
 
     /// Returns true iff the document is still "alive". In other words, if it has not been deleted.
+    #[inline]
     pub fn is_alive(&self, doc: DocId) -> bool {
         !self.is_deleted(doc)
     }
@@ -92,6 +92,22 @@ impl DeleteBitSet {
         b & (1u8 << shift) != 0
     }
 
+    /// Returns true iff the document has been marked as deleted.
+    #[inline]
+    pub fn iter(&self) -> impl Iterator<Item = bool> + '_ {
+        let data = self.data.as_slice();
+        data.iter().flat_map(|el| {
+            (0..8).map(move |pos| {
+                let val = el >> pos;
+                if (val & 1) == 1 {
+                    true
+                } else {
+                    false
+                }
+            })
+        })
+    }
+
     /// The number of deleted docs
     pub fn num_deleted(&self) -> usize {
         self.num_deleted
@@ -110,6 +126,7 @@ impl HasLen for DeleteBitSet {
 
 #[cfg(test)]
 mod tests {
+
     use super::DeleteBitSet;
     use common::HasLen;
 
@@ -141,4 +158,36 @@ mod tests {
         }
         assert_eq!(delete_bitset.len(), 2);
     }
+
+    #[test]
+    fn test_delete_bitset_iter_small() {
+        let delete_bitset = DeleteBitSet::for_test(&[0, 2, 3, 6], 7);
+
+        let data: Vec<_> = delete_bitset.iter().collect();
+        assert!(data[0]);
+        assert!(!data[1]);
+        assert!(data[2]);
+        assert!(data[3]);
+        assert!(!data[4]);
+        assert!(!data[5]);
+        assert!(data[6]);
+    }
+    #[test]
+    fn test_delete_bitset_iter() {
+        let delete_bitset = DeleteBitSet::for_test(&[1, 2, 3, 5, 10], 11);
+
+        let data: Vec<_> = delete_bitset.iter().collect();
+        assert!(!data[0]);
+        assert!(data[1]);
+        assert!(data[2]);
+        assert!(data[3]);
+        assert!(!data[4]);
+        assert!(data[5]);
+        assert!(!data[6]);
+        assert!(!data[7]);
+        assert!(!data[8]);
+        assert!(!data[9]);
+        assert!(data[10]);
+        assert!(!data[11]);
+    }
 }
diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs
index ef3874256..d603f2900 100644
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -98,27 +98,25 @@ pub struct IndexMerger {
 
 fn compute_min_max_val(
     u64_reader: &impl FastFieldReader<u64>,
+    segment_reader: &SegmentReader,
     max_doc: DocId,
     delete_bitset_opt: Option<&DeleteBitSet>,
 ) -> Option<(u64, u64)> {
     if max_doc == 0 {
         None
     } else {
-        match delete_bitset_opt {
-            Some(delete_bitset) => {
-                // some deleted documents,
-                // we need to recompute the max / min
-                minmax(
-                    (0..max_doc)
-                        .filter(|doc_id| delete_bitset.is_alive(*doc_id))
-                        .map(|doc_id| u64_reader.get(doc_id)),
-                )
-            }
-            None => {
-                // no deleted documents,
-                // we can use the previous min_val, max_val.
-                Some((u64_reader.min_value(), u64_reader.max_value()))
-            }
+        if delete_bitset_opt.is_some() {
+            // some deleted documents,
+            // we need to recompute the max / min
+            minmax(
+                segment_reader
+                    .doc_ids_alive()
+                    .map(|doc_id| u64_reader.get(doc_id)),
+            )
+        } else {
+            // no deleted documents,
+            // we can use the previous min_val, max_val.
+            Some((u64_reader.min_value(), u64_reader.max_value()))
         }
     }
 }
@@ -326,7 +324,7 @@ impl IndexMerger {
                 .fast_fields()
                 .typed_fast_field_reader(field)
                 .expect("Failed to find a reader for single fast field. This is a tantivy bug and it should never happen.");
-                compute_min_max_val(&u64_reader, reader.max_doc(), reader.delete_bitset())
+                compute_min_max_val(&u64_reader, reader, reader.max_doc(), reader.delete_bitset())
             })
             .flatten()
             .reduce(|a, b| {
@@ -505,11 +503,9 @@ impl IndexMerger {
         for (reader, u64s_reader) in reader_and_field_accessors.iter() {
             if let Some(delete_bitset) = reader.delete_bitset() {
                 num_docs += reader.max_doc() as u64 - delete_bitset.len() as u64;
-                for doc in 0u32..reader.max_doc() {
-                    if delete_bitset.is_alive(doc) {
-                        let num_vals = u64s_reader.get_len(doc) as u64;
-                        total_num_vals += num_vals;
-                    }
+                for doc in reader.doc_ids_alive() {
+                    let num_vals = u64s_reader.get_len(doc) as u64;
+                    total_num_vals += num_vals;
                 }
             } else {
                 num_docs += reader.max_doc() as u64;
diff --git a/src/indexer/segment_entry.rs b/src/indexer/segment_entry.rs
index 4ac352e50..e0beb2179 100644
--- a/src/indexer/segment_entry.rs
+++ b/src/indexer/segment_entry.rs
@@ -9,8 +9,6 @@ use std::fmt;
 ///
 /// In addition to segment `meta`,
 /// it contains a few transient states
-/// - `state` expresses whether the segment is already in the
-/// middle of a merge
 /// - `delete_bitset` is a bitset describing
 /// documents that were deleted during the commit
 /// itself.

From 4da71273e1408ba844f8ce6868ec7ab366086afb Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Fri, 17 Sep 2021 10:28:12 +0800
Subject: [PATCH 02/13] add de/serialization for bitset

remove len footgun
---
 common/src/bitset.rs    | 61 +++++++++++++++++++++++++++++++++++++++--
 src/fastfield/delete.rs | 12 ++------
 src/indexer/merger.rs   |  3 +-
 3 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/common/src/bitset.rs b/common/src/bitset.rs
index 942a94269..c5f741829 100644
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -1,5 +1,7 @@
-use std::fmt;
+use std::convert::TryInto;
+use std::io::Write;
 use std::u64;
+use std::{fmt, io};
 
 #[derive(Clone, Copy, Eq, PartialEq)]
 pub struct TinySet(u64);
@@ -28,6 +30,15 @@ impl IntoIterator for TinySet {
 }
 
 impl TinySet {
+    pub fn serialize(&self, writer: &mut dyn Write) -> io::Result<()> {
+        writer.write_all(self.0.to_le_bytes().as_ref())
+    }
+
+    pub fn deserialize(data: &[u8]) -> io::Result<Self> {
+        let val: u64 = u64::from_le_bytes(data[..8].try_into().unwrap());
+        Ok(TinySet(val))
+    }
+
     /// Returns an empty `TinySet`.
     pub fn empty() -> TinySet {
         TinySet(0u64)
@@ -123,7 +134,7 @@ impl TinySet {
 #[derive(Clone)]
 pub struct BitSet {
     tinysets: Box<[TinySet]>,
-    len: usize,
+    len: u64,
     max_value: u32,
 }
 
@@ -132,6 +143,41 @@ fn num_buckets(max_val: u32) -> u32 {
 }
 
 impl BitSet {
+    /// Write a `BitSet`
+    ///
+    pub fn serialize(&mut self, writer: &mut dyn Write) -> io::Result<()> {
+        writer.write_all(self.len.to_le_bytes().as_ref())?;
+        writer.write_all(self.max_value.to_le_bytes().as_ref())?;
+
+        for tinyset in self.tinysets.iter() {
+            tinyset.serialize(writer)?;
+        }
+        writer.flush()?;
+        Ok(())
+    }
+
+    /// UnWrite a `BitSet`
+    ///
+    pub fn deserialize(&mut self, mut data: &[u8]) -> io::Result<Self> {
+        let len: u64 = u64::from_le_bytes(data[..8].try_into().unwrap());
+        data = &data[8..];
+
+        let max_value: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
+        data = &data[4..];
+
+        let mut tinysets = vec![];
+        while !data.is_empty() {
+            let tinyset = TinySet::deserialize(data)?;
+            tinysets.push(tinyset);
+            data = &data[8..];
+        }
+        Ok(BitSet {
+            tinysets: tinysets.into_boxed_slice(),
+            len,
+            max_value,
+        })
+    }
+
     /// Create a new `BitSet` that may contain elements
     /// within `[0, max_val[`.
     pub fn with_max_value(max_value: u32) -> BitSet {
@@ -153,7 +199,7 @@ impl BitSet {
 
     /// Returns the number of elements in the `BitSet`.
     pub fn len(&self) -> usize {
-        self.len
+        self.len as usize
     }
 
     /// Inserts an element in the `BitSet`
@@ -249,6 +295,15 @@ mod tests {
                 assert_eq!(hashset.contains(&el), bitset.contains(el));
             }
             assert_eq!(bitset.max_value(), max_value);
+
+            // test deser
+            let mut data = vec![];
+            bitset.serialize(&mut data).unwrap();
+            let bitset = bitset.deserialize(&data).unwrap();
+            for el in 0..max_value {
+                assert_eq!(hashset.contains(&el), bitset.contains(el));
+            }
+            assert_eq!(bitset.max_value(), max_value);
         };
 
         test_against_hashset(&[], 0);
diff --git a/src/fastfield/delete.rs b/src/fastfield/delete.rs
index f695f9968..eff577d2b 100644
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -3,7 +3,6 @@ use crate::directory::OwnedBytes;
 use crate::space_usage::ByteCount;
 use crate::DocId;
 use common::BitSet;
-use common::HasLen;
 use std::io;
 use std::io::Write;
 
@@ -118,17 +117,10 @@ impl DeleteBitSet {
     }
 }
 
-impl HasLen for DeleteBitSet {
-    fn len(&self) -> usize {
-        self.num_deleted
-    }
-}
-
 #[cfg(test)]
 mod tests {
 
     use super::DeleteBitSet;
-    use common::HasLen;
 
     #[test]
     fn test_delete_bitset_empty() {
@@ -136,7 +128,7 @@ mod tests {
         for doc in 0..10 {
             assert_eq!(delete_bitset.is_deleted(doc), !delete_bitset.is_alive(doc));
         }
-        assert_eq!(delete_bitset.len(), 0);
+        assert_eq!(delete_bitset.num_deleted(), 0);
     }
 
     #[test]
@@ -156,7 +148,7 @@ mod tests {
         for doc in 0..10 {
             assert_eq!(delete_bitset.is_deleted(doc), !delete_bitset.is_alive(doc));
         }
-        assert_eq!(delete_bitset.len(), 2);
+        assert_eq!(delete_bitset.num_deleted(), 2);
     }
 
     #[test]
diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs
index d603f2900..06d02859a 100644
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -29,7 +29,6 @@ use crate::{
     SegmentOrdinal,
 };
 use crate::{DocId, InvertedIndexReader, SegmentComponent};
-use common::HasLen;
 use itertools::Itertools;
 use measure_time::debug_time;
 use std::cmp;
@@ -502,7 +501,7 @@ impl IndexMerger {
         let mut num_docs = 0;
         for (reader, u64s_reader) in reader_and_field_accessors.iter() {
             if let Some(delete_bitset) = reader.delete_bitset() {
-                num_docs += reader.max_doc() as u64 - delete_bitset.len() as u64;
+                num_docs += reader.max_doc() as u64 - delete_bitset.num_deleted() as u64;
                 for doc in reader.doc_ids_alive() {
                     let num_vals = u64s_reader.get_len(doc) as u64;
                     total_num_vals += num_vals;

From c22177a0056b8578a056c73e8c6c67eea3a4f22b Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Fri, 17 Sep 2021 15:29:27 +0800
Subject: [PATCH 03/13] add iterator

---
 common/src/bitset.rs       |  64 +++++++++++++++-------
 src/core/segment_reader.rs |   9 +---
 src/fastfield/delete.rs    | 105 ++++++++++++++++++-------------------
 3 files changed, 97 insertions(+), 81 deletions(-)

diff --git a/common/src/bitset.rs b/common/src/bitset.rs
index c5f741829..f8bbb0fd5 100644
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -34,8 +34,8 @@ impl TinySet {
         writer.write_all(self.0.to_le_bytes().as_ref())
     }
 
-    pub fn deserialize(data: &[u8]) -> io::Result<Self> {
-        let val: u64 = u64::from_le_bytes(data[..8].try_into().unwrap());
+    pub fn deserialize(data: [u8; 8]) -> io::Result<Self> {
+        let val: u64 = u64::from_le_bytes(data);
         Ok(TinySet(val))
     }
 
@@ -145,9 +145,9 @@ fn num_buckets(max_val: u32) -> u32 {
 impl BitSet {
     /// Write a `BitSet`
     ///
-    pub fn serialize(&mut self, writer: &mut dyn Write) -> io::Result<()> {
-        writer.write_all(self.len.to_le_bytes().as_ref())?;
-        writer.write_all(self.max_value.to_le_bytes().as_ref())?;
+    pub fn serialize(&self, writer: &mut dyn Write) -> io::Result<()> {
+        //writer.write_all(self.len.to_le_bytes().as_ref())?;
+        //writer.write_all(self.max_value.to_le_bytes().as_ref())?;
 
         for tinyset in self.tinysets.iter() {
             tinyset.serialize(writer)?;
@@ -156,28 +156,41 @@ impl BitSet {
         Ok(())
     }
 
-    /// UnWrite a `BitSet`
+    /// Deserialize a `BitSet`. BitSet is considered immutable after deserialization.
     ///
-    pub fn deserialize(&mut self, mut data: &[u8]) -> io::Result<Self> {
-        let len: u64 = u64::from_le_bytes(data[..8].try_into().unwrap());
-        data = &data[8..];
+    pub fn deserialize(data: &[u8]) -> io::Result<Self> {
+        //let len: u64 = u64::from_le_bytes(data[..8].try_into().unwrap());
+        //data = &data[8..];
 
-        let max_value: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
-        data = &data[4..];
+        //let max_value: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
+        //data = &data[4..];
 
         let mut tinysets = vec![];
-        while !data.is_empty() {
-            let tinyset = TinySet::deserialize(data)?;
+        for chunk in data.chunks_exact(8) {
+            let tinyset = TinySet::deserialize(chunk.try_into().unwrap())?;
             tinysets.push(tinyset);
-            data = &data[8..];
         }
         Ok(BitSet {
             tinysets: tinysets.into_boxed_slice(),
-            len,
-            max_value,
+            len: 0,
+            max_value: 0,
         })
     }
 
+    /// Iterate over the positions of the set elements
+    #[inline]
+    pub fn iter_positions_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = u32> + 'a {
+        data.chunks_exact(8)
+            .enumerate()
+            .filter(|(_, tinyset)| !tinyset.is_empty())
+            .flat_map(|(chunk_num, chunk)| {
+                let tinyset = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
+                tinyset
+                    .into_iter()
+                    .map(move |val| val + chunk_num as u32 * 64)
+            })
+    }
+
     /// Create a new `BitSet` that may contain elements
     /// within `[0, max_val[`.
     pub fn with_max_value(max_value: u32) -> BitSet {
@@ -253,6 +266,7 @@ mod tests {
     use rand::rngs::StdRng;
     use rand::{Rng, SeedableRng};
     use std::collections::HashSet;
+    use std::convert::TryInto;
 
     #[test]
     fn test_tiny_set() {
@@ -279,6 +293,21 @@ mod tests {
             assert_eq!(u.pop_lowest(), Some(63u32));
             assert!(u.pop_lowest().is_none());
         }
+        {
+            let mut u = TinySet::empty().insert(63u32).insert(5);
+            assert_eq!(u.pop_lowest(), Some(5u32));
+            assert_eq!(u.pop_lowest(), Some(63u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let u = TinySet::empty().insert(63u32).insert(5);
+            let mut data = vec![];
+            u.serialize(&mut data).unwrap();
+            let mut u = TinySet::deserialize(data[..8].try_into().unwrap()).unwrap();
+            assert_eq!(u.pop_lowest(), Some(5u32));
+            assert_eq!(u.pop_lowest(), Some(63u32));
+            assert!(u.pop_lowest().is_none());
+        }
     }
 
     #[test]
@@ -299,11 +328,10 @@ mod tests {
             // test deser
             let mut data = vec![];
             bitset.serialize(&mut data).unwrap();
-            let bitset = bitset.deserialize(&data).unwrap();
+            let bitset = BitSet::deserialize(&data).unwrap();
             for el in 0..max_value {
                 assert_eq!(hashset.contains(&el), bitset.contains(el));
             }
-            assert_eq!(bitset.max_value(), max_value);
         };
 
         test_against_hashset(&[], 0);
diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs
index 45ee859f1..c80471663 100644
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -289,14 +289,7 @@ impl SegmentReader {
     /// Returns an iterator that will iterate over the alive document ids
     pub fn doc_ids_alive(&self) -> Box<dyn Iterator<Item = DocId> + '_> {
         if let Some(delete_bitset) = &self.delete_bitset_opt {
-            Box::new(
-                delete_bitset
-                    .iter()
-                    .take(self.max_doc() as usize)
-                    .enumerate()
-                    .filter(|(_docid, is_deleted)| !is_deleted)
-                    .map(|(docid, _is_deleted)| docid as DocId),
-            )
+            Box::new(delete_bitset.iter_positions())
         } else {
             Box::new(0u32..self.max_doc)
         }
diff --git a/src/fastfield/delete.rs b/src/fastfield/delete.rs
index eff577d2b..bc1b286a1 100644
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -3,6 +3,8 @@ use crate::directory::OwnedBytes;
 use crate::space_usage::ByteCount;
 use crate::DocId;
 use common::BitSet;
+use common::TinySet;
+use std::convert::TryInto;
 use std::io;
 use std::io::Write;
 
@@ -16,23 +18,24 @@ pub fn write_delete_bitset(
     max_doc: u32,
     writer: &mut dyn Write,
 ) -> io::Result<()> {
-    let mut byte = 0u8;
-    let mut shift = 0u8;
-    for doc in 0..max_doc {
-        if delete_bitset.contains(doc) {
-            byte |= 1 << shift;
-        }
-        if shift == 7 {
-            writer.write_all(&[byte])?;
-            shift = 0;
-            byte = 0;
-        } else {
-            shift += 1;
-        }
-    }
-    if max_doc % 8 > 0 {
-        writer.write_all(&[byte])?;
-    }
+    delete_bitset.serialize(writer)?;
+    //let mut byte = 0u8;
+    //let mut shift = 0u8;
+    //for doc in 0..max_doc {
+    //if delete_bitset.contains(doc) {
+    //byte |= 1 << shift;
+    //}
+    //if shift == 7 {
+    //writer.write_all(&[byte])?;
+    //shift = 0;
+    //byte = 0;
+    //} else {
+    //shift += 1;
+    //}
+    //}
+    //if max_doc % 8 > 0 {
+    //writer.write_all(&[byte])?;
+    //}
     Ok(())
 }
 
@@ -65,11 +68,14 @@ impl DeleteBitSet {
     /// Opens a delete bitset given its file.
     pub fn open(file: FileSlice) -> crate::Result<DeleteBitSet> {
         let bytes = file.read_bytes()?;
-        let num_deleted: usize = bytes
-            .as_slice()
-            .iter()
-            .map(|b| b.count_ones() as usize)
+        let num_deleted = bytes
+            .chunks_exact(8)
+            .map(|chunk| {
+                let tinyset = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
+                tinyset.len() as usize
+            })
             .sum();
+
         Ok(DeleteBitSet {
             data: bytes,
             num_deleted,
@@ -91,20 +97,11 @@ impl DeleteBitSet {
         b & (1u8 << shift) != 0
     }
 
-    /// Returns true iff the document has been marked as deleted.
+    /// Iterate over the positions of the set elements
     #[inline]
-    pub fn iter(&self) -> impl Iterator<Item = bool> + '_ {
+    pub fn iter_positions(&self) -> impl Iterator<Item = u32> + '_ {
         let data = self.data.as_slice();
-        data.iter().flat_map(|el| {
-            (0..8).map(move |pos| {
-                let val = el >> pos;
-                if (val & 1) == 1 {
-                    true
-                } else {
-                    false
-                }
-            })
-        })
+        BitSet::iter_positions_from_bytes(data)
     }
 
     /// The number of deleted docs
@@ -151,35 +148,33 @@ mod tests {
         assert_eq!(delete_bitset.num_deleted(), 2);
     }
 
+    #[test]
+    fn test_delete_bitset_iter_minimal() {
+        let delete_bitset = DeleteBitSet::for_test(&[7], 8);
+
+        let data: Vec<_> = delete_bitset.iter_positions().collect();
+        assert_eq!(data, vec![7]);
+    }
+
     #[test]
     fn test_delete_bitset_iter_small() {
         let delete_bitset = DeleteBitSet::for_test(&[0, 2, 3, 6], 7);
 
-        let data: Vec<_> = delete_bitset.iter().collect();
-        assert!(data[0]);
-        assert!(!data[1]);
-        assert!(data[2]);
-        assert!(data[3]);
-        assert!(!data[4]);
-        assert!(!data[5]);
-        assert!(data[6]);
+        let data: Vec<_> = delete_bitset.iter_positions().collect();
+        assert_eq!(data, vec![0, 2, 3, 6]);
     }
     #[test]
     fn test_delete_bitset_iter() {
-        let delete_bitset = DeleteBitSet::for_test(&[1, 2, 3, 5, 10], 11);
+        let delete_bitset = DeleteBitSet::for_test(&[1, 2, 3, 5, 10, 64, 65, 66, 100], 110);
 
-        let data: Vec<_> = delete_bitset.iter().collect();
-        assert!(!data[0]);
-        assert!(data[1]);
-        assert!(data[2]);
-        assert!(data[3]);
-        assert!(!data[4]);
-        assert!(data[5]);
-        assert!(!data[6]);
-        assert!(!data[7]);
-        assert!(!data[8]);
-        assert!(!data[9]);
-        assert!(data[10]);
-        assert!(!data[11]);
+        let data: Vec<_> = delete_bitset.iter_positions().collect();
+        assert_eq!(data, vec![1, 2, 3, 5, 10, 64, 65, 66, 100]);
+    }
+    #[test]
+    fn test_delete_bitset_iter_empty_blocks() {
+        let delete_bitset = DeleteBitSet::for_test(&[1, 2, 3, 5, 10, 64, 65, 66, 100, 1000], 1010);
+
+        let data: Vec<_> = delete_bitset.iter_positions().collect();
+        assert_eq!(data, vec![1, 2, 3, 5, 10, 64, 65, 66, 100, 1000]);
     }
 }

From 93cbd52bf06002d626b11a2ea3ad1d6e55723298 Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Sat, 18 Sep 2021 17:35:22 +0800
Subject: [PATCH 04/13] move code to biset, add inline, add benchmark

---
 common/src/bitset.rs        |  57 +++++++++++-----
 src/core/segment_reader.rs  |   2 +-
 src/fastfield/delete.rs     | 126 +++++++++++++++++++++---------------
 src/indexer/index_writer.rs |   2 +-
 4 files changed, 118 insertions(+), 69 deletions(-)

diff --git a/common/src/bitset.rs b/common/src/bitset.rs
index f8bbb0fd5..df4dda632 100644
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -16,6 +16,7 @@ pub struct TinySetIterator(TinySet);
 impl Iterator for TinySetIterator {
     type Item = u32;
 
+    #[inline]
     fn next(&mut self) -> Option<Self::Item> {
         self.0.pop_lowest()
     }
@@ -34,6 +35,7 @@ impl TinySet {
         writer.write_all(self.0.to_le_bytes().as_ref())
     }
 
+    #[inline]
     pub fn deserialize(data: [u8; 8]) -> io::Result<Self> {
         let val: u64 = u64::from_le_bytes(data);
         Ok(TinySet(val))
@@ -48,21 +50,25 @@ impl TinySet {
         self.0 = 0u64;
     }
 
+    #[inline]
     /// Returns the complement of the set in `[0, 64[`.
     fn complement(self) -> TinySet {
         TinySet(!self.0)
     }
 
+    #[inline]
     /// Returns true iff the `TinySet` contains the element `el`.
     pub fn contains(self, el: u32) -> bool {
         !self.intersect(TinySet::singleton(el)).is_empty()
     }
 
+    #[inline]
     /// Returns the number of elements in the TinySet.
     pub fn len(self) -> u32 {
         self.0.count_ones()
     }
 
+    #[inline]
     /// Returns the intersection of `self` and `other`
     pub fn intersect(self, other: TinySet) -> TinySet {
         TinySet(self.0 & other.0)
@@ -146,8 +152,7 @@ impl BitSet {
     /// Write a `BitSet`
     ///
     pub fn serialize(&self, writer: &mut dyn Write) -> io::Result<()> {
-        //writer.write_all(self.len.to_le_bytes().as_ref())?;
-        //writer.write_all(self.max_value.to_le_bytes().as_ref())?;
+        writer.write_all(self.max_value.to_le_bytes().as_ref())?;
 
         for tinyset in self.tinysets.iter() {
             tinyset.serialize(writer)?;
@@ -158,12 +163,9 @@ impl BitSet {
 
     /// Deserialize a `BitSet`. BitSet is considered immutable after deserialization.
     ///
-    pub fn deserialize(data: &[u8]) -> io::Result<Self> {
-        //let len: u64 = u64::from_le_bytes(data[..8].try_into().unwrap());
-        //data = &data[8..];
-
-        //let max_value: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
-        //data = &data[4..];
+    pub fn deserialize(mut data: &[u8]) -> io::Result<Self> {
+        let max_value: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
+        data = &data[4..];
 
         let mut tinysets = vec![];
         for chunk in data.chunks_exact(8) {
@@ -173,21 +175,35 @@ impl BitSet {
         Ok(BitSet {
             tinysets: tinysets.into_boxed_slice(),
             len: 0,
-            max_value: 0,
+            max_value,
         })
     }
 
-    /// Iterate over the positions of the set elements
+    /// Iterate the tinyset on the fly from serialized data.
+    ///
     #[inline]
-    pub fn iter_positions_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = u32> + 'a {
-        data.chunks_exact(8)
+    pub fn iter_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = TinySet> + 'a {
+        data[4..].chunks_exact(8).map(move |chunk| {
+            let tinyset: TinySet = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
+            tinyset
+        })
+    }
+
+    /// Iterate over the positions of the unset elements.
+    ///
+    /// max_val needs to be provided, since the last 64 bits may
+    #[inline]
+    pub fn iter_unset_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = u32> + 'a {
+        let max_val: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
+        Self::iter_from_bytes(data)
+            .map(|tinyset| tinyset.complement())
             .enumerate()
-            .filter(|(_, tinyset)| !tinyset.is_empty())
-            .flat_map(|(chunk_num, chunk)| {
-                let tinyset = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
+            .flat_map(move |(chunk_num, tinyset)| {
+                let chunk_base_val = chunk_num as u32 * 64;
                 tinyset
                     .into_iter()
-                    .map(move |val| val + chunk_num as u32 * 64)
+                    .map(move |val| val + chunk_base_val)
+                    .take_while(move |doc| *doc < max_val)
             })
     }
 
@@ -227,6 +243,15 @@ impl BitSet {
         };
     }
 
+    /// Returns true iff the elements is in the `BitSet`.
+    #[inline]
+    pub fn contains_from_bytes(el: u32, data: &[u8]) -> bool {
+        let byte_offset = 4 + el / 8u32;
+        let b: u8 = data[byte_offset as usize];
+        let shift = (el & 7u32) as u8;
+        b & (1u8 << shift) != 0
+    }
+
     /// Returns true iff the elements is in the `BitSet`.
     pub fn contains(&self, el: u32) -> bool {
         self.tinyset(el / 64u32).contains(el % 64)
diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs
index c80471663..5504f8c60 100644
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -289,7 +289,7 @@ impl SegmentReader {
     /// Returns an iterator that will iterate over the alive document ids
     pub fn doc_ids_alive(&self) -> Box<dyn Iterator<Item = DocId> + '_> {
         if let Some(delete_bitset) = &self.delete_bitset_opt {
-            Box::new(delete_bitset.iter_positions())
+            Box::new(delete_bitset.iter_unset())
         } else {
             Box::new(0u32..self.max_doc)
         }
diff --git a/src/fastfield/delete.rs b/src/fastfield/delete.rs
index bc1b286a1..22af3a68b 100644
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -3,8 +3,6 @@ use crate::directory::OwnedBytes;
 use crate::space_usage::ByteCount;
 use crate::DocId;
 use common::BitSet;
-use common::TinySet;
-use std::convert::TryInto;
 use std::io;
 use std::io::Write;
 
@@ -13,29 +11,8 @@ use std::io::Write;
 /// where `delete_bitset` is the set of deleted `DocId`.
 /// Warning: this function does not call terminate. The caller is in charge of
 /// closing the writer properly.
-pub fn write_delete_bitset(
-    delete_bitset: &BitSet,
-    max_doc: u32,
-    writer: &mut dyn Write,
-) -> io::Result<()> {
+pub fn write_delete_bitset(delete_bitset: &BitSet, writer: &mut dyn Write) -> io::Result<()> {
     delete_bitset.serialize(writer)?;
-    //let mut byte = 0u8;
-    //let mut shift = 0u8;
-    //for doc in 0..max_doc {
-    //if delete_bitset.contains(doc) {
-    //byte |= 1 << shift;
-    //}
-    //if shift == 7 {
-    //writer.write_all(&[byte])?;
-    //shift = 0;
-    //byte = 0;
-    //} else {
-    //shift += 1;
-    //}
-    //}
-    //if max_doc % 8 > 0 {
-    //writer.write_all(&[byte])?;
-    //}
     Ok(())
 }
 
@@ -59,7 +36,7 @@ impl DeleteBitSet {
         let directory = RamDirectory::create();
         let path = Path::new("dummydeletebitset");
         let mut wrt = directory.open_write(path).unwrap();
-        write_delete_bitset(&bitset, max_doc, &mut wrt).unwrap();
+        write_delete_bitset(&bitset, &mut wrt).unwrap();
         wrt.terminate().unwrap();
         let file = directory.open_read(path).unwrap();
         Self::open(file).unwrap()
@@ -68,12 +45,8 @@ impl DeleteBitSet {
     /// Opens a delete bitset given its file.
     pub fn open(file: FileSlice) -> crate::Result<DeleteBitSet> {
         let bytes = file.read_bytes()?;
-        let num_deleted = bytes
-            .chunks_exact(8)
-            .map(|chunk| {
-                let tinyset = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
-                tinyset.len() as usize
-            })
+        let num_deleted = BitSet::iter_from_bytes(bytes.as_slice())
+            .map(|tinyset| tinyset.len() as usize)
             .sum();
 
         Ok(DeleteBitSet {
@@ -91,17 +64,15 @@ impl DeleteBitSet {
     /// Returns true iff the document has been marked as deleted.
     #[inline]
     pub fn is_deleted(&self, doc: DocId) -> bool {
-        let byte_offset = doc / 8u32;
-        let b: u8 = self.data.as_slice()[byte_offset as usize];
-        let shift = (doc & 7u32) as u8;
-        b & (1u8 << shift) != 0
+        let data = self.data.as_slice();
+        BitSet::contains_from_bytes(doc, data)
     }
 
     /// Iterate over the positions of the set elements
     #[inline]
-    pub fn iter_positions(&self) -> impl Iterator<Item = u32> + '_ {
+    pub fn iter_unset(&self) -> impl Iterator<Item = u32> + '_ {
         let data = self.data.as_slice();
-        BitSet::iter_positions_from_bytes(data)
+        BitSet::iter_unset_from_bytes(data)
     }
 
     /// The number of deleted docs
@@ -152,29 +123,82 @@ mod tests {
     fn test_delete_bitset_iter_minimal() {
         let delete_bitset = DeleteBitSet::for_test(&[7], 8);
 
-        let data: Vec<_> = delete_bitset.iter_positions().collect();
-        assert_eq!(data, vec![7]);
+        let data: Vec<_> = delete_bitset.iter_unset().collect();
+        assert_eq!(data, vec![0, 1, 2, 3, 4, 5, 6]);
     }
 
     #[test]
     fn test_delete_bitset_iter_small() {
         let delete_bitset = DeleteBitSet::for_test(&[0, 2, 3, 6], 7);
 
-        let data: Vec<_> = delete_bitset.iter_positions().collect();
-        assert_eq!(data, vec![0, 2, 3, 6]);
+        let data: Vec<_> = delete_bitset.iter_unset().collect();
+        assert_eq!(data, vec![1, 4, 5]);
     }
     #[test]
     fn test_delete_bitset_iter() {
-        let delete_bitset = DeleteBitSet::for_test(&[1, 2, 3, 5, 10, 64, 65, 66, 100], 110);
+        let delete_bitset = DeleteBitSet::for_test(&[0, 1, 1000], 1001);
 
-        let data: Vec<_> = delete_bitset.iter_positions().collect();
-        assert_eq!(data, vec![1, 2, 3, 5, 10, 64, 65, 66, 100]);
-    }
-    #[test]
-    fn test_delete_bitset_iter_empty_blocks() {
-        let delete_bitset = DeleteBitSet::for_test(&[1, 2, 3, 5, 10, 64, 65, 66, 100, 1000], 1010);
-
-        let data: Vec<_> = delete_bitset.iter_positions().collect();
-        assert_eq!(data, vec![1, 2, 3, 5, 10, 64, 65, 66, 100, 1000]);
+        let data: Vec<_> = delete_bitset.iter_unset().collect();
+        assert_eq!(data, (2..=999).collect::<Vec<_>>());
+    }
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use super::DeleteBitSet;
+    use common::BitSet;
+    use rand::prelude::IteratorRandom;
+    use rand::prelude::SliceRandom;
+    use rand::thread_rng;
+    use test::Bencher;
+
+    fn get_many_deleted() -> Vec<u32> {
+        let mut data = (0..1_000_000_u32).collect::<Vec<u32>>();
+        for _ in 0..(1_000_000) * 7 / 8 {
+            remove_rand(&mut data);
+        }
+        data
+    }
+
+    fn remove_rand(raw: &mut Vec<u32>) {
+        let i = (0..raw.len()).choose(&mut thread_rng()).unwrap();
+        raw.remove(i);
+    }
+
+    #[bench]
+    fn bench_deletebitset_iter_deser_on_fly(bench: &mut Bencher) {
+        let delete_bitset = DeleteBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
+
+        bench.iter(|| delete_bitset.iter_unset().collect::<Vec<_>>());
+    }
+
+    #[bench]
+    fn bench_deletebitset_access(bench: &mut Bencher) {
+        let delete_bitset = DeleteBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
+
+        bench.iter(|| {
+            (0..1_000_000_u32)
+                .filter(|doc| delete_bitset.is_alive(*doc))
+                .collect::<Vec<_>>()
+        });
+    }
+
+    #[bench]
+    fn bench_deletebitset_iter_deser_on_fly_1_8_alive(bench: &mut Bencher) {
+        let delete_bitset = DeleteBitSet::for_test(&get_many_deleted(), 1_000_000);
+
+        bench.iter(|| delete_bitset.iter_unset().collect::<Vec<_>>());
+    }
+
+    #[bench]
+    fn bench_deletebitset_access_1_8_alive(bench: &mut Bencher) {
+        let delete_bitset = DeleteBitSet::for_test(&get_many_deleted(), 1_000_000);
+
+        bench.iter(|| {
+            (0..1_000_000_u32)
+                .filter(|doc| delete_bitset.is_alive(*doc))
+                .collect::<Vec<_>>()
+        });
     }
 }
diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs
index 30dd7f4f1..c42b87080 100644
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -180,7 +180,7 @@ pub(crate) fn advance_deletes(
         // There are new deletes. We need to write a new delete file.
         segment = segment.with_delete_meta(num_deleted_docs as u32, target_opstamp);
         let mut delete_file = segment.open_write(SegmentComponent::Delete)?;
-        write_delete_bitset(&delete_bitset, max_doc, &mut delete_file)?;
+        write_delete_bitset(&delete_bitset, &mut delete_file)?;
         delete_file.terminate()?;
     }
 

From beb3a5bd7325e95d5c7b150cde597b673994fd87 Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Sat, 18 Sep 2021 17:58:15 +0800
Subject: [PATCH 05/13] fix len

---
 common/src/bitset.rs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/common/src/bitset.rs b/common/src/bitset.rs
index df4dda632..6d9ffb109 100644
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -149,7 +149,7 @@ fn num_buckets(max_val: u32) -> u32 {
 }
 
 impl BitSet {
-    /// Write a `BitSet`
+    /// serialize a `BitSet`.
     ///
     pub fn serialize(&self, writer: &mut dyn Write) -> io::Result<()> {
         writer.write_all(self.max_value.to_le_bytes().as_ref())?;
@@ -161,20 +161,22 @@ impl BitSet {
         Ok(())
     }
 
-    /// Deserialize a `BitSet`. BitSet is considered immutable after deserialization.
+    /// Deserialize a `BitSet`.
     ///
     pub fn deserialize(mut data: &[u8]) -> io::Result<Self> {
         let max_value: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
         data = &data[4..];
 
+        let mut len: u64 = 0;
         let mut tinysets = vec![];
         for chunk in data.chunks_exact(8) {
             let tinyset = TinySet::deserialize(chunk.try_into().unwrap())?;
+            len += tinyset.len() as u64;
             tinysets.push(tinyset);
         }
         Ok(BitSet {
             tinysets: tinysets.into_boxed_slice(),
-            len: 0,
+            len,
             max_value,
         })
     }
@@ -357,6 +359,8 @@ mod tests {
             for el in 0..max_value {
                 assert_eq!(hashset.contains(&el), bitset.contains(el));
             }
+            assert_eq!(bitset.max_value(), max_value);
+            assert_eq!(bitset.len(), els.len());
         };
 
         test_against_hashset(&[], 0);

From 4583fa270b24e812c37a22e7100a0c6d110f1d0a Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Thu, 23 Sep 2021 10:39:53 +0800
Subject: [PATCH 06/13] fixes

---
 common/src/bitset.rs  | 3 ++-
 src/indexer/merger.rs | 8 +++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/common/src/bitset.rs b/common/src/bitset.rs
index 6d9ffb109..6b0d99c9a 100644
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -185,6 +185,7 @@ impl BitSet {
     ///
     #[inline]
     pub fn iter_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = TinySet> + 'a {
+        assert!((data.len() - 4) % 8 == 0);
         data[4..].chunks_exact(8).map(move |chunk| {
             let tinyset: TinySet = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
             tinyset
@@ -250,7 +251,7 @@ impl BitSet {
     pub fn contains_from_bytes(el: u32, data: &[u8]) -> bool {
         let byte_offset = 4 + el / 8u32;
         let b: u8 = data[byte_offset as usize];
-        let shift = (el & 7u32) as u8;
+        let shift = (el % 8) as u8;
         b & (1u8 << shift) != 0
     }
 
diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs
index 06d02859a..4d69b0915 100644
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -98,13 +98,11 @@ pub struct IndexMerger {
 fn compute_min_max_val(
     u64_reader: &impl FastFieldReader<u64>,
     segment_reader: &SegmentReader,
-    max_doc: DocId,
-    delete_bitset_opt: Option<&DeleteBitSet>,
 ) -> Option<(u64, u64)> {
-    if max_doc == 0 {
+    if segment_reader.max_doc() == 0 {
         None
     } else {
-        if delete_bitset_opt.is_some() {
+        if segment_reader.delete_bitset().is_some() {
             // some deleted documents,
             // we need to recompute the max / min
             minmax(
@@ -323,7 +321,7 @@ impl IndexMerger {
                 .fast_fields()
                 .typed_fast_field_reader(field)
                 .expect("Failed to find a reader for single fast field. This is a tantivy bug and it should never happen.");
-                compute_min_max_val(&u64_reader, reader, reader.max_doc(), reader.delete_bitset())
+                compute_min_max_val(&u64_reader, reader)
             })
             .flatten()
             .reduce(|a, b| {

From a1f5cead96f8f3b4321a9b068bbde0ff49275863 Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Thu, 23 Sep 2021 20:03:57 +0800
Subject: [PATCH 07/13] AliveBitSet instead of DeleteBitSet

---
 common/src/bitset.rs                         | 133 ++++++++++++++++---
 src/core/segment_reader.rs                   |  10 +-
 src/docset.rs                                |   8 +-
 src/fastfield/{delete.rs => alive_bitset.rs} |  54 ++++----
 src/fastfield/mod.rs                         |   6 +-
 src/indexer/index_writer.rs                  |  11 +-
 src/indexer/merger.rs                        |   1 -
 src/indexer/merger_sorted_index_test.rs      |   8 +-
 src/postings/segment_postings.rs             |  10 +-
 src/query/bitset/mod.rs                      |   4 +-
 src/query/boost_query.rs                     |   4 +-
 src/store/mod.rs                             |   4 +-
 src/store/reader.rs                          |   6 +-
 13 files changed, 177 insertions(+), 82 deletions(-)
 rename src/fastfield/{delete.rs => alive_bitset.rs} (75%)

diff --git a/common/src/bitset.rs b/common/src/bitset.rs
index 6b0d99c9a..527abed9b 100644
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -31,7 +31,7 @@ impl IntoIterator for TinySet {
 }
 
 impl TinySet {
-    pub fn serialize(&self, writer: &mut dyn Write) -> io::Result<()> {
+    pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
         writer.write_all(self.0.to_le_bytes().as_ref())
     }
 
@@ -42,17 +42,24 @@ impl TinySet {
     }
 
     /// Returns an empty `TinySet`.
+    #[inline]
     pub fn empty() -> TinySet {
         TinySet(0u64)
     }
 
+    /// Returns a full `TinySet`.
+    #[inline]
+    pub fn full() -> TinySet {
+        TinySet::empty().complement()
+    }
+
     pub fn clear(&mut self) {
         self.0 = 0u64;
     }
 
     #[inline]
     /// Returns the complement of the set in `[0, 64[`.
-    fn complement(self) -> TinySet {
+    pub fn complement(self) -> TinySet {
         TinySet(!self.0)
     }
 
@@ -68,6 +75,12 @@ impl TinySet {
         self.0.count_ones()
     }
 
+    #[inline]
+    /// Returns the number of elements in the TinySet.
+    pub fn num_unset(self) -> u32 {
+        self.0.count_zeros()
+    }
+
     #[inline]
     /// Returns the intersection of `self` and `other`
     pub fn intersect(self, other: TinySet) -> TinySet {
@@ -81,13 +94,21 @@ impl TinySet {
         TinySet(1u64 << u64::from(el))
     }
 
-    /// Insert a new element within [0..64[
+    /// Insert a new element within [0..64)
     #[inline]
     pub fn insert(self, el: u32) -> TinySet {
         self.union(TinySet::singleton(el))
     }
 
-    /// Insert a new element within [0..64[
+    /// Removes an element within [0..64)
+    #[inline]
+    pub fn remove(self, el: u32) -> TinySet {
+        self.intersect(TinySet::singleton(el).complement())
+    }
+
+    /// Insert a new element within [0..64)
+    ///
+    /// returns true if the bit changed
     #[inline]
     pub fn insert_mut(&mut self, el: u32) -> bool {
         let old = *self;
@@ -95,6 +116,16 @@ impl TinySet {
         old != *self
     }
 
+    /// Remove a new element within [0..64)
+    ///
+    /// returns true if the bit changed
+    #[inline]
+    pub fn remove_mut(&mut self, el: u32) -> bool {
+        let old = *self;
+        *self = old.remove(el);
+        old != *self
+    }
+
     /// Returns the union of two tinysets
     #[inline]
     pub fn union(self, other: TinySet) -> TinySet {
@@ -151,7 +182,7 @@ fn num_buckets(max_val: u32) -> u32 {
 impl BitSet {
     /// serialize a `BitSet`.
     ///
-    pub fn serialize(&self, writer: &mut dyn Write) -> io::Result<()> {
+    pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
         writer.write_all(self.max_value.to_le_bytes().as_ref())?;
 
         for tinyset in self.tinysets.iter() {
@@ -163,6 +194,7 @@ impl BitSet {
 
     /// Deserialize a `BitSet`.
     ///
+    #[cfg(test)]
     pub fn deserialize(mut data: &[u8]) -> io::Result<Self> {
         let max_value: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
         data = &data[4..];
@@ -181,10 +213,19 @@ impl BitSet {
         })
     }
 
+    /// Count the number of unset bits from serialized data.
+    ///
+    #[inline]
+    pub fn count_unset_from_bytes<'a>(data: &'a [u8]) -> usize {
+        BitSet::iter_tinysets_from_bytes(data)
+            .map(|tinyset| tinyset.num_unset() as usize)
+            .sum()
+    }
+
     /// Iterate the tinyset on the fly from serialized data.
     ///
     #[inline]
-    pub fn iter_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = TinySet> + 'a {
+    fn iter_tinysets_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = TinySet> + 'a {
         assert!((data.len() - 4) % 8 == 0);
         data[4..].chunks_exact(8).map(move |chunk| {
             let tinyset: TinySet = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
@@ -198,8 +239,7 @@ impl BitSet {
     #[inline]
     pub fn iter_unset_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = u32> + 'a {
         let max_val: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
-        Self::iter_from_bytes(data)
-            .map(|tinyset| tinyset.complement())
+        Self::iter_tinysets_from_bytes(data)
             .enumerate()
             .flat_map(move |(chunk_num, tinyset)| {
                 let chunk_base_val = chunk_num as u32 * 64;
@@ -211,7 +251,7 @@ impl BitSet {
     }
 
     /// Create a new `BitSet` that may contain elements
-    /// within `[0, max_val[`.
+    /// within `[0, max_val)`.
     pub fn with_max_value(max_value: u32) -> BitSet {
         let num_buckets = num_buckets(max_value);
         let tinybisets = vec![TinySet::empty(); num_buckets as usize].into_boxed_slice();
@@ -222,6 +262,18 @@ impl BitSet {
         }
     }
 
+    /// Create a new `BitSet` that may contain elements
+    /// within `[0, max_val)`.
+    pub fn with_max_value_and_filled(max_value: u32) -> BitSet {
+        let num_buckets = num_buckets(max_value);
+        let tinybisets = vec![TinySet::full(); num_buckets as usize].into_boxed_slice();
+        BitSet {
+            tinysets: tinybisets,
+            len: max_value as u64,
+            max_value,
+        }
+    }
+
     /// Removes all elements from the `BitSet`.
     pub fn clear(&mut self) {
         for tinyset in self.tinysets.iter_mut() {
@@ -230,7 +282,7 @@ impl BitSet {
     }
 
     /// Returns the number of elements in the `BitSet`.
-    pub fn len(&self) -> usize {
+    pub fn num_set_bits(&self) -> usize {
         self.len as usize
     }
 
@@ -246,6 +298,18 @@ impl BitSet {
         };
     }
 
+    /// Inserts an element in the `BitSet`
+    pub fn remove(&mut self, el: u32) {
+        // we do not check saturated els.
+        let higher = el / 64u32;
+        let lower = el % 64u32;
+        self.len -= if self.tinysets[higher as usize].remove_mut(lower) {
+            1
+        } else {
+            0
+        };
+    }
+
     /// Returns true iff the elements is in the `BitSet`.
     #[inline]
     pub fn contains_from_bytes(el: u32, data: &[u8]) -> bool {
@@ -296,6 +360,33 @@ mod tests {
     use std::collections::HashSet;
     use std::convert::TryInto;
 
+    #[test]
+    fn test_tiny_set_remove() {
+        {
+            let mut u = TinySet::empty().insert(63u32).insert(5).remove(63u32);
+            assert_eq!(u.pop_lowest(), Some(5u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty()
+                .insert(63u32)
+                .insert(1)
+                .insert(5)
+                .remove(63u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert_eq!(u.pop_lowest(), Some(5u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty().insert(1).remove(63u32);
+            assert_eq!(u.pop_lowest(), Some(1u32));
+            assert!(u.pop_lowest().is_none());
+        }
+        {
+            let mut u = TinySet::empty().insert(1).remove(1u32);
+            assert!(u.pop_lowest().is_none());
+        }
+    }
     #[test]
     fn test_tiny_set() {
         assert!(TinySet::empty().is_empty());
@@ -361,7 +452,7 @@ mod tests {
                 assert_eq!(hashset.contains(&el), bitset.contains(el));
             }
             assert_eq!(bitset.max_value(), max_value);
-            assert_eq!(bitset.len(), els.len());
+            assert_eq!(bitset.num_set_bits(), els.len());
         };
 
         test_against_hashset(&[], 0);
@@ -415,17 +506,25 @@ mod tests {
     #[test]
     fn test_bitset_len() {
         let mut bitset = BitSet::with_max_value(1_000);
-        assert_eq!(bitset.len(), 0);
+        assert_eq!(bitset.num_set_bits(), 0);
         bitset.insert(3u32);
-        assert_eq!(bitset.len(), 1);
+        assert_eq!(bitset.num_set_bits(), 1);
         bitset.insert(103u32);
-        assert_eq!(bitset.len(), 2);
+        assert_eq!(bitset.num_set_bits(), 2);
         bitset.insert(3u32);
-        assert_eq!(bitset.len(), 2);
+        assert_eq!(bitset.num_set_bits(), 2);
         bitset.insert(103u32);
-        assert_eq!(bitset.len(), 2);
+        assert_eq!(bitset.num_set_bits(), 2);
         bitset.insert(104u32);
-        assert_eq!(bitset.len(), 3);
+        assert_eq!(bitset.num_set_bits(), 3);
+        bitset.remove(105u32);
+        assert_eq!(bitset.num_set_bits(), 3);
+        bitset.remove(104u32);
+        assert_eq!(bitset.num_set_bits(), 2);
+        bitset.remove(3u32);
+        assert_eq!(bitset.num_set_bits(), 1);
+        bitset.remove(103u32);
+        assert_eq!(bitset.num_set_bits(), 0);
     }
 
     pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs
index 5504f8c60..10b224e33 100644
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -5,7 +5,7 @@ use crate::core::SegmentId;
 use crate::directory::CompositeFile;
 use crate::directory::FileSlice;
 use crate::error::DataCorruption;
-use crate::fastfield::DeleteBitSet;
+use crate::fastfield::AliveBitSet;
 use crate::fastfield::FacetReader;
 use crate::fastfield::FastFieldReaders;
 use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
@@ -47,7 +47,7 @@ pub struct SegmentReader {
     fieldnorm_readers: FieldNormReaders,
 
     store_file: FileSlice,
-    delete_bitset_opt: Option<DeleteBitSet>,
+    delete_bitset_opt: Option<AliveBitSet>,
     schema: Schema,
 }
 
@@ -172,7 +172,7 @@ impl SegmentReader {
 
         let delete_bitset_opt = if segment.meta().has_deletes() {
             let delete_data = segment.open_read(SegmentComponent::Delete)?;
-            let delete_bitset = DeleteBitSet::open(delete_data)?;
+            let delete_bitset = AliveBitSet::open(delete_data)?;
             Some(delete_bitset)
         } else {
             None
@@ -274,7 +274,7 @@ impl SegmentReader {
 
     /// Returns the bitset representing
     /// the documents that have been deleted.
-    pub fn delete_bitset(&self) -> Option<&DeleteBitSet> {
+    pub fn delete_bitset(&self) -> Option<&AliveBitSet> {
         self.delete_bitset_opt.as_ref()
     }
 
@@ -307,7 +307,7 @@ impl SegmentReader {
             self.get_store_reader()?.space_usage(),
             self.delete_bitset_opt
                 .as_ref()
-                .map(DeleteBitSet::space_usage)
+                .map(AliveBitSet::space_usage)
                 .unwrap_or(0),
         ))
     }
diff --git a/src/docset.rs b/src/docset.rs
index 3c5dfdd31..72352e689 100644
--- a/src/docset.rs
+++ b/src/docset.rs
@@ -1,4 +1,4 @@
-use crate::fastfield::DeleteBitSet;
+use crate::fastfield::AliveBitSet;
 use crate::DocId;
 use std::borrow::Borrow;
 use std::borrow::BorrowMut;
@@ -85,7 +85,7 @@ pub trait DocSet: Send {
 
     /// Returns the number documents matching.
     /// Calling this method consumes the `DocSet`.
-    fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
+    fn count(&mut self, delete_bitset: &AliveBitSet) -> u32 {
         let mut count = 0u32;
         let mut doc = self.doc();
         while doc != TERMINATED {
@@ -130,7 +130,7 @@ impl<'a> DocSet for &'a mut dyn DocSet {
         (**self).size_hint()
     }
 
-    fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
+    fn count(&mut self, delete_bitset: &AliveBitSet) -> u32 {
         (**self).count(delete_bitset)
     }
 
@@ -160,7 +160,7 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
         unboxed.size_hint()
     }
 
-    fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
+    fn count(&mut self, delete_bitset: &AliveBitSet) -> u32 {
         let unboxed: &mut TDocSet = self.borrow_mut();
         unboxed.count(delete_bitset)
     }
diff --git a/src/fastfield/delete.rs b/src/fastfield/alive_bitset.rs
similarity index 75%
rename from src/fastfield/delete.rs
rename to src/fastfield/alive_bitset.rs
index 22af3a68b..7cc098004 100644
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/alive_bitset.rs
@@ -11,27 +11,27 @@ use std::io::Write;
 /// where `delete_bitset` is the set of deleted `DocId`.
 /// Warning: this function does not call terminate. The caller is in charge of
 /// closing the writer properly.
-pub fn write_delete_bitset(delete_bitset: &BitSet, writer: &mut dyn Write) -> io::Result<()> {
+pub fn write_delete_bitset<T: Write>(delete_bitset: &BitSet, writer: &mut T) -> io::Result<()> {
     delete_bitset.serialize(writer)?;
     Ok(())
 }
 
 /// Set of deleted `DocId`s.
 #[derive(Clone)]
-pub struct DeleteBitSet {
+pub struct AliveBitSet {
     data: OwnedBytes,
     num_deleted: usize,
 }
 
-impl DeleteBitSet {
+impl AliveBitSet {
     #[cfg(test)]
-    pub(crate) fn for_test(docs: &[DocId], max_doc: u32) -> DeleteBitSet {
+    pub(crate) fn for_test(not_alive_docs: &[DocId], max_doc: u32) -> AliveBitSet {
         use crate::directory::{Directory, RamDirectory, TerminatingWrite};
         use std::path::Path;
-        assert!(docs.iter().all(|&doc| doc < max_doc));
-        let mut bitset = BitSet::with_max_value(max_doc);
-        for &doc in docs {
-            bitset.insert(doc);
+        assert!(not_alive_docs.iter().all(|&doc| doc < max_doc));
+        let mut bitset = BitSet::with_max_value_and_filled(max_doc);
+        for &doc in not_alive_docs {
+            bitset.remove(doc);
         }
         let directory = RamDirectory::create();
         let path = Path::new("dummydeletebitset");
@@ -43,13 +43,11 @@ impl DeleteBitSet {
     }
 
     /// Opens a delete bitset given its file.
-    pub fn open(file: FileSlice) -> crate::Result<DeleteBitSet> {
+    pub fn open(file: FileSlice) -> crate::Result<AliveBitSet> {
         let bytes = file.read_bytes()?;
-        let num_deleted = BitSet::iter_from_bytes(bytes.as_slice())
-            .map(|tinyset| tinyset.len() as usize)
-            .sum();
+        let num_deleted = BitSet::count_unset_from_bytes(bytes.as_slice());
 
-        Ok(DeleteBitSet {
+        Ok(AliveBitSet {
             data: bytes,
             num_deleted,
         })
@@ -65,7 +63,7 @@ impl DeleteBitSet {
     #[inline]
     pub fn is_deleted(&self, doc: DocId) -> bool {
         let data = self.data.as_slice();
-        BitSet::contains_from_bytes(doc, data)
+        !BitSet::contains_from_bytes(doc, data)
     }
 
     /// Iterate over the positions of the set elements
@@ -88,11 +86,11 @@ impl DeleteBitSet {
 #[cfg(test)]
 mod tests {
 
-    use super::DeleteBitSet;
+    use super::AliveBitSet;
 
     #[test]
     fn test_delete_bitset_empty() {
-        let delete_bitset = DeleteBitSet::for_test(&[], 10);
+        let delete_bitset = AliveBitSet::for_test(&[], 10);
         for doc in 0..10 {
             assert_eq!(delete_bitset.is_deleted(doc), !delete_bitset.is_alive(doc));
         }
@@ -101,7 +99,7 @@ mod tests {
 
     #[test]
     fn test_delete_bitset() {
-        let delete_bitset = DeleteBitSet::for_test(&[1, 9], 10);
+        let delete_bitset = AliveBitSet::for_test(&[1, 9], 10);
         assert!(delete_bitset.is_alive(0));
         assert!(delete_bitset.is_deleted(1));
         assert!(delete_bitset.is_alive(2));
@@ -121,7 +119,7 @@ mod tests {
 
     #[test]
     fn test_delete_bitset_iter_minimal() {
-        let delete_bitset = DeleteBitSet::for_test(&[7], 8);
+        let delete_bitset = AliveBitSet::for_test(&[7], 8);
 
         let data: Vec<_> = delete_bitset.iter_unset().collect();
         assert_eq!(data, vec![0, 1, 2, 3, 4, 5, 6]);
@@ -129,14 +127,14 @@ mod tests {
 
     #[test]
     fn test_delete_bitset_iter_small() {
-        let delete_bitset = DeleteBitSet::for_test(&[0, 2, 3, 6], 7);
+        let delete_bitset = AliveBitSet::for_test(&[0, 2, 3, 6], 7);
 
         let data: Vec<_> = delete_bitset.iter_unset().collect();
         assert_eq!(data, vec![1, 4, 5]);
     }
     #[test]
     fn test_delete_bitset_iter() {
-        let delete_bitset = DeleteBitSet::for_test(&[0, 1, 1000], 1001);
+        let delete_bitset = AliveBitSet::for_test(&[0, 1, 1000], 1001);
 
         let data: Vec<_> = delete_bitset.iter_unset().collect();
         assert_eq!(data, (2..=999).collect::<Vec<_>>());
@@ -146,16 +144,14 @@ mod tests {
 #[cfg(all(test, feature = "unstable"))]
 mod bench {
 
-    use super::DeleteBitSet;
-    use common::BitSet;
+    use super::AliveBitSet;
     use rand::prelude::IteratorRandom;
-    use rand::prelude::SliceRandom;
     use rand::thread_rng;
     use test::Bencher;
 
-    fn get_many_deleted() -> Vec<u32> {
+    fn get_alive() -> Vec<u32> {
         let mut data = (0..1_000_000_u32).collect::<Vec<u32>>();
-        for _ in 0..(1_000_000) * 7 / 8 {
+        for _ in 0..(1_000_000) * 1 / 8 {
             remove_rand(&mut data);
         }
         data
@@ -168,14 +164,14 @@ mod bench {
 
     #[bench]
     fn bench_deletebitset_iter_deser_on_fly(bench: &mut Bencher) {
-        let delete_bitset = DeleteBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
+        let delete_bitset = AliveBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
 
         bench.iter(|| delete_bitset.iter_unset().collect::<Vec<_>>());
     }
 
     #[bench]
     fn bench_deletebitset_access(bench: &mut Bencher) {
-        let delete_bitset = DeleteBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
+        let delete_bitset = AliveBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
 
         bench.iter(|| {
             (0..1_000_000_u32)
@@ -186,14 +182,14 @@ mod bench {
 
     #[bench]
     fn bench_deletebitset_iter_deser_on_fly_1_8_alive(bench: &mut Bencher) {
-        let delete_bitset = DeleteBitSet::for_test(&get_many_deleted(), 1_000_000);
+        let delete_bitset = AliveBitSet::for_test(&get_alive(), 1_000_000);
 
         bench.iter(|| delete_bitset.iter_unset().collect::<Vec<_>>());
     }
 
     #[bench]
     fn bench_deletebitset_access_1_8_alive(bench: &mut Bencher) {
-        let delete_bitset = DeleteBitSet::for_test(&get_many_deleted(), 1_000_000);
+        let delete_bitset = AliveBitSet::for_test(&get_alive(), 1_000_000);
 
         bench.iter(|| {
             (0..1_000_000_u32)
diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs
index a3dc8c17f..73a3a475c 100644
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -23,9 +23,9 @@ values stored.
 Read access performance is comparable to that of an array lookup.
 */
 
+pub use self::alive_bitset::write_delete_bitset;
+pub use self::alive_bitset::AliveBitSet;
 pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
-pub use self::delete::write_delete_bitset;
-pub use self::delete::DeleteBitSet;
 pub use self::error::{FastFieldNotAvailableError, Result};
 pub use self::facet_reader::FacetReader;
 pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter};
@@ -46,8 +46,8 @@ use crate::{
     schema::Type,
 };
 
+mod alive_bitset;
 mod bytes;
-mod delete;
 mod error;
 mod facet_reader;
 mod multivalued;
diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs
index c42b87080..e4623e548 100644
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -114,7 +114,7 @@ fn compute_deleted_bitset(
             let mut doc_matching_deleted_term = docset.doc();
             while doc_matching_deleted_term != TERMINATED {
                 if doc_opstamps.is_deleted(doc_matching_deleted_term, delete_op.opstamp) {
-                    delete_bitset.insert(doc_matching_deleted_term);
+                    delete_bitset.remove(doc_matching_deleted_term);
                     might_have_changed = true;
                 }
                 doc_matching_deleted_term = docset.advance();
@@ -151,7 +151,7 @@ pub(crate) fn advance_deletes(
     let max_doc = segment_reader.max_doc();
     let mut delete_bitset: BitSet = match segment_entry.delete_bitset() {
         Some(previous_delete_bitset) => (*previous_delete_bitset).clone(),
-        None => BitSet::with_max_value(max_doc),
+        None => BitSet::with_max_value_and_filled(max_doc),
     };
 
     let num_deleted_docs_before = segment.meta().num_deleted_docs();
@@ -170,12 +170,13 @@ pub(crate) fn advance_deletes(
     if let Some(seg_delete_bitset) = segment_reader.delete_bitset() {
         for doc in 0u32..max_doc {
             if seg_delete_bitset.is_deleted(doc) {
-                delete_bitset.insert(doc);
+                delete_bitset.remove(doc);
             }
         }
     }
 
-    let num_deleted_docs: u32 = delete_bitset.len() as u32;
+    let num_alive_docs: u32 = delete_bitset.num_set_bits() as u32;
+    let num_deleted_docs = max_doc - num_alive_docs;
     if num_deleted_docs > num_deleted_docs_before {
         // There are new deletes. We need to write a new delete file.
         segment = segment.with_delete_meta(num_deleted_docs as u32, target_opstamp);
@@ -259,7 +260,7 @@ fn apply_deletes(
     let doc_to_opstamps = DocToOpstampMapping::WithMap(doc_opstamps);
 
     let max_doc = segment.meta().max_doc();
-    let mut deleted_bitset = BitSet::with_max_value(max_doc);
+    let mut deleted_bitset = BitSet::with_max_value_and_filled(max_doc);
     let may_have_deletes = compute_deleted_bitset(
         &mut deleted_bitset,
         &segment_reader,
diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs
index 4d69b0915..a34fd541c 100644
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -1,6 +1,5 @@
 use crate::error::DataCorruption;
 use crate::fastfield::CompositeFastFieldSerializer;
-use crate::fastfield::DeleteBitSet;
 use crate::fastfield::DynamicFastFieldReader;
 use crate::fastfield::FastFieldDataAccess;
 use crate::fastfield::FastFieldReader;
diff --git a/src/indexer/merger_sorted_index_test.rs b/src/indexer/merger_sorted_index_test.rs
index d06f0b40d..a9950affa 100644
--- a/src/indexer/merger_sorted_index_test.rs
+++ b/src/indexer/merger_sorted_index_test.rs
@@ -1,6 +1,6 @@
 #[cfg(test)]
 mod tests {
-    use crate::fastfield::{DeleteBitSet, FastFieldReader};
+    use crate::fastfield::{AliveBitSet, FastFieldReader};
     use crate::schema::IndexRecordOption;
     use crate::{
         collector::TopDocs,
@@ -257,7 +257,7 @@ mod tests {
                 .unwrap();
 
             assert_eq!(postings.doc_freq(), 2);
-            let fallback_bitset = DeleteBitSet::for_test(&[0], 100);
+            let fallback_bitset = AliveBitSet::for_test(&[0], 100);
             assert_eq!(
                 postings.doc_freq_given_deletes(
                     segment_reader.delete_bitset().unwrap_or(&fallback_bitset)
@@ -336,7 +336,7 @@ mod tests {
                 .unwrap()
                 .unwrap();
             assert_eq!(postings.doc_freq(), 2);
-            let fallback_bitset = DeleteBitSet::for_test(&[0], 100);
+            let fallback_bitset = AliveBitSet::for_test(&[0], 100);
             assert_eq!(
                 postings.doc_freq_given_deletes(
                     segment_reader.delete_bitset().unwrap_or(&fallback_bitset)
@@ -446,7 +446,7 @@ mod tests {
                 .unwrap();
 
             assert_eq!(postings.doc_freq(), 2);
-            let fallback_bitset = DeleteBitSet::for_test(&[0], 100);
+            let fallback_bitset = AliveBitSet::for_test(&[0], 100);
             assert_eq!(
                 postings.doc_freq_given_deletes(
                     segment_reader.delete_bitset().unwrap_or(&fallback_bitset)
diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs
index aa470d99f..753737d51 100644
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -1,5 +1,5 @@
 use crate::docset::DocSet;
-use crate::fastfield::DeleteBitSet;
+use crate::fastfield::AliveBitSet;
 use crate::positions::PositionReader;
 use crate::postings::branchless_binary_search;
 use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
@@ -34,7 +34,7 @@ impl SegmentPostings {
     ///
     /// This method will clone and scan through the posting lists.
     /// (this is a rather expensive operation).
-    pub fn doc_freq_given_deletes(&self, delete_bitset: &DeleteBitSet) -> u32 {
+    pub fn doc_freq_given_deletes(&self, delete_bitset: &AliveBitSet) -> u32 {
         let mut docset = self.clone();
         let mut doc_freq = 0;
         loop {
@@ -268,7 +268,7 @@ mod tests {
     use common::HasLen;
 
     use crate::docset::{DocSet, TERMINATED};
-    use crate::fastfield::DeleteBitSet;
+    use crate::fastfield::AliveBitSet;
     use crate::postings::postings::Postings;
 
     #[test]
@@ -296,9 +296,9 @@ mod tests {
     fn test_doc_freq() {
         let docs = SegmentPostings::create_from_docs(&[0, 2, 10]);
         assert_eq!(docs.doc_freq(), 3);
-        let delete_bitset = DeleteBitSet::for_test(&[2], 12);
+        let delete_bitset = AliveBitSet::for_test(&[2], 12);
         assert_eq!(docs.doc_freq_given_deletes(&delete_bitset), 2);
-        let all_deleted = DeleteBitSet::for_test(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
+        let all_deleted = AliveBitSet::for_test(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
         assert_eq!(docs.doc_freq_given_deletes(&all_deleted), 0);
     }
 }
diff --git a/src/query/bitset/mod.rs b/src/query/bitset/mod.rs
index 030fdeae7..ebd6e7b36 100644
--- a/src/query/bitset/mod.rs
+++ b/src/query/bitset/mod.rs
@@ -90,7 +90,7 @@ impl DocSet for BitSetDocSet {
     /// but we don't have access to any better
     /// value.
     fn size_hint(&self) -> u32 {
-        self.docs.len() as u32
+        self.docs.num_set_bits() as u32
     }
 }
 
@@ -124,7 +124,7 @@ mod tests {
         for i in 0..100_000 {
             assert_eq!(btreeset.contains(&i), bitset.contains(i));
         }
-        assert_eq!(btreeset.len(), bitset.len());
+        assert_eq!(btreeset.len(), bitset.num_set_bits());
         let mut bitset_docset = BitSetDocSet::from(bitset);
         let mut remaining = true;
         for el in btreeset.into_iter() {
diff --git a/src/query/boost_query.rs b/src/query/boost_query.rs
index 41c94e0f8..72bd3ebba 100644
--- a/src/query/boost_query.rs
+++ b/src/query/boost_query.rs
@@ -1,4 +1,4 @@
-use crate::fastfield::DeleteBitSet;
+use crate::fastfield::AliveBitSet;
 use crate::query::explanation::does_not_match;
 use crate::query::{Explanation, Query, Scorer, Weight};
 use crate::{DocId, DocSet, Score, Searcher, SegmentReader, Term};
@@ -118,7 +118,7 @@ impl<S: Scorer> DocSet for BoostScorer<S> {
         self.underlying.size_hint()
     }
 
-    fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 {
+    fn count(&mut self, delete_bitset: &AliveBitSet) -> u32 {
         self.underlying.count(delete_bitset)
     }
 
diff --git a/src/store/mod.rs b/src/store/mod.rs
index 00e0c4b13..0ad341473 100644
--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -57,7 +57,7 @@ pub mod tests {
     use futures::executor::block_on;
 
     use super::*;
-    use crate::fastfield::DeleteBitSet;
+    use crate::fastfield::AliveBitSet;
     use crate::schema::{self, FieldValue, TextFieldIndexing, STORED, TEXT};
     use crate::schema::{Document, TextOptions};
     use crate::{
@@ -113,7 +113,7 @@ pub mod tests {
     fn test_doc_store_iter_with_delete_bug_1077() -> crate::Result<()> {
         // this will cover deletion of the first element in a checkpoint
         let deleted_docids = (200..300).collect::<Vec<_>>();
-        let delete_bitset = DeleteBitSet::for_test(&deleted_docids, NUM_DOCS as u32);
+        let delete_bitset = AliveBitSet::for_test(&deleted_docids, NUM_DOCS as u32);
 
         let path = Path::new("store");
         let directory = RamDirectory::create();
diff --git a/src/store/reader.rs b/src/store/reader.rs
index 3ff04f691..75012718d 100644
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -5,7 +5,7 @@ use crate::schema::Document;
 use crate::space_usage::StoreSpaceUsage;
 use crate::store::index::Checkpoint;
 use crate::DocId;
-use crate::{error::DataCorruption, fastfield::DeleteBitSet};
+use crate::{error::DataCorruption, fastfield::AliveBitSet};
 use common::{BinarySerializable, HasLen, VInt};
 use lru::LruCache;
 use std::io;
@@ -136,7 +136,7 @@ impl StoreReader {
     /// The delete_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
     pub fn iter<'a: 'b, 'b>(
         &'b self,
-        delete_bitset: Option<&'a DeleteBitSet>,
+        delete_bitset: Option<&'a AliveBitSet>,
     ) -> impl Iterator<Item = crate::Result<Document>> + 'b {
         self.iter_raw(delete_bitset).map(|doc_bytes_res| {
             let mut doc_bytes = doc_bytes_res?;
@@ -149,7 +149,7 @@ impl StoreReader {
     /// The delete_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
     pub(crate) fn iter_raw<'a: 'b, 'b>(
         &'b self,
-        delete_bitset: Option<&'a DeleteBitSet>,
+        delete_bitset: Option<&'a AliveBitSet>,
     ) -> impl Iterator<Item = crate::Result<OwnedBytes>> + 'b {
         let last_docid = self
             .block_checkpoints()

From d7a6a409a108a3c539ea074f1edc3e2c335a8687 Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Thu, 23 Sep 2021 20:33:11 +0800
Subject: [PATCH 08/13] renames

---
 src/collector/mod.rs                    |  4 +-
 src/collector/top_score_collector.rs    |  4 +-
 src/core/segment_reader.rs              | 26 ++++----
 src/docset.rs                           | 12 ++--
 src/fastfield/alive_bitset.rs           | 84 ++++++++++++-------------
 src/fastfield/mod.rs                    |  2 +-
 src/indexer/index_writer.rs             | 30 ++++-----
 src/indexer/merger.rs                   | 16 ++---
 src/indexer/merger_sorted_index_test.rs |  6 +-
 src/indexer/segment_entry.rs            | 16 ++---
 src/postings/segment_postings.rs        |  8 +--
 src/query/boost_query.rs                |  4 +-
 src/query/term_query/term_weight.rs     |  4 +-
 src/query/weight.rs                     |  4 +-
 src/store/mod.rs                        | 10 +--
 src/store/reader.rs                     | 12 ++--
 16 files changed, 121 insertions(+), 121 deletions(-)

diff --git a/src/collector/mod.rs b/src/collector/mod.rs
index c7e64f004..bb409fb79 100644
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -178,9 +178,9 @@ pub trait Collector: Sync + Send {
     ) -> crate::Result<<Self::Child as SegmentCollector>::Fruit> {
         let mut segment_collector = self.for_segment(segment_ord as u32, reader)?;
 
-        if let Some(delete_bitset) = reader.delete_bitset() {
+        if let Some(alive_bitset) = reader.alive_bitset() {
             weight.for_each(reader, &mut |doc, score| {
-                if delete_bitset.is_alive(doc) {
+                if alive_bitset.is_alive(doc) {
                     segment_collector.collect(doc, score);
                 }
             })?;
diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs
index b1786b77c..51d0a5801 100644
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -629,10 +629,10 @@ impl Collector for TopDocs {
         let heap_len = self.0.limit + self.0.offset;
         let mut heap: BinaryHeap<ComparableDoc<Score, DocId>> = BinaryHeap::with_capacity(heap_len);
 
-        if let Some(delete_bitset) = reader.delete_bitset() {
+        if let Some(alive_bitset) = reader.alive_bitset() {
             let mut threshold = Score::MIN;
             weight.for_each_pruning(threshold, reader, &mut |doc, score| {
-                if delete_bitset.is_deleted(doc) {
+                if alive_bitset.is_deleted(doc) {
                     return threshold;
                 }
                 let heap_item = ComparableDoc {
diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs
index 10b224e33..8ec303b6e 100644
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -47,7 +47,7 @@ pub struct SegmentReader {
     fieldnorm_readers: FieldNormReaders,
 
     store_file: FileSlice,
-    delete_bitset_opt: Option<AliveBitSet>,
+    alive_bitset_opt: Option<AliveBitSet>,
     schema: Schema,
 }
 
@@ -72,14 +72,14 @@ impl SegmentReader {
     /// Return the number of documents that have been
     /// deleted in the segment.
     pub fn num_deleted_docs(&self) -> DocId {
-        self.delete_bitset()
+        self.alive_bitset()
             .map(|delete_set| delete_set.num_deleted() as DocId)
             .unwrap_or(0u32)
     }
 
     /// Returns true iff some of the documents of the segment have been deleted.
     pub fn has_deletes(&self) -> bool {
-        self.delete_bitset().is_some()
+        self.alive_bitset().is_some()
     }
 
     /// Accessor to a segment's fast field reader given a field.
@@ -170,10 +170,10 @@ impl SegmentReader {
         let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
         let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
 
-        let delete_bitset_opt = if segment.meta().has_deletes() {
+        let alive_bitset_opt = if segment.meta().has_deletes() {
             let delete_data = segment.open_read(SegmentComponent::Delete)?;
-            let delete_bitset = AliveBitSet::open(delete_data)?;
-            Some(delete_bitset)
+            let alive_bitset = AliveBitSet::open(delete_data)?;
+            Some(alive_bitset)
         } else {
             None
         };
@@ -188,7 +188,7 @@ impl SegmentReader {
             fieldnorm_readers,
             segment_id: segment.id(),
             store_file,
-            delete_bitset_opt,
+            alive_bitset_opt,
             positions_composite,
             schema,
         })
@@ -274,22 +274,22 @@ impl SegmentReader {
 
     /// Returns the bitset representing
     /// the documents that have been deleted.
-    pub fn delete_bitset(&self) -> Option<&AliveBitSet> {
-        self.delete_bitset_opt.as_ref()
+    pub fn alive_bitset(&self) -> Option<&AliveBitSet> {
+        self.alive_bitset_opt.as_ref()
     }
 
     /// Returns true iff the `doc` is marked
     /// as deleted.
     pub fn is_deleted(&self, doc: DocId) -> bool {
-        self.delete_bitset()
+        self.alive_bitset()
             .map(|delete_set| delete_set.is_deleted(doc))
             .unwrap_or(false)
     }
 
     /// Returns an iterator that will iterate over the alive document ids
     pub fn doc_ids_alive(&self) -> Box<dyn Iterator<Item = DocId> + '_> {
-        if let Some(delete_bitset) = &self.delete_bitset_opt {
-            Box::new(delete_bitset.iter_unset())
+        if let Some(alive_bitset) = &self.alive_bitset_opt {
+            Box::new(alive_bitset.iter_unset())
         } else {
             Box::new(0u32..self.max_doc)
         }
@@ -305,7 +305,7 @@ impl SegmentReader {
             self.fast_fields_readers.space_usage(),
             self.fieldnorm_readers.space_usage(),
             self.get_store_reader()?.space_usage(),
-            self.delete_bitset_opt
+            self.alive_bitset_opt
                 .as_ref()
                 .map(AliveBitSet::space_usage)
                 .unwrap_or(0),
diff --git a/src/docset.rs b/src/docset.rs
index 72352e689..0df231e23 100644
--- a/src/docset.rs
+++ b/src/docset.rs
@@ -85,11 +85,11 @@ pub trait DocSet: Send {
 
     /// Returns the number documents matching.
     /// Calling this method consumes the `DocSet`.
-    fn count(&mut self, delete_bitset: &AliveBitSet) -> u32 {
+    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
         let mut count = 0u32;
         let mut doc = self.doc();
         while doc != TERMINATED {
-            if !delete_bitset.is_deleted(doc) {
+            if !alive_bitset.is_deleted(doc) {
                 count += 1u32;
             }
             doc = self.advance();
@@ -130,8 +130,8 @@ impl<'a> DocSet for &'a mut dyn DocSet {
         (**self).size_hint()
     }
 
-    fn count(&mut self, delete_bitset: &AliveBitSet) -> u32 {
-        (**self).count(delete_bitset)
+    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
+        (**self).count(alive_bitset)
     }
 
     fn count_including_deleted(&mut self) -> u32 {
@@ -160,9 +160,9 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
         unboxed.size_hint()
     }
 
-    fn count(&mut self, delete_bitset: &AliveBitSet) -> u32 {
+    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
         let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.count(delete_bitset)
+        unboxed.count(alive_bitset)
     }
 
     fn count_including_deleted(&mut self) -> u32 {
diff --git a/src/fastfield/alive_bitset.rs b/src/fastfield/alive_bitset.rs
index 7cc098004..3f8136c23 100644
--- a/src/fastfield/alive_bitset.rs
+++ b/src/fastfield/alive_bitset.rs
@@ -6,17 +6,17 @@ use common::BitSet;
 use std::io;
 use std::io::Write;
 
-/// Write a delete `BitSet`
+/// Write a alive `BitSet`
 ///
-/// where `delete_bitset` is the set of deleted `DocId`.
+/// where `alive_bitset` is the set of alive `DocId`.
 /// Warning: this function does not call terminate. The caller is in charge of
 /// closing the writer properly.
-pub fn write_delete_bitset<T: Write>(delete_bitset: &BitSet, writer: &mut T) -> io::Result<()> {
-    delete_bitset.serialize(writer)?;
+pub fn write_alive_bitset<T: Write>(alive_bitset: &BitSet, writer: &mut T) -> io::Result<()> {
+    alive_bitset.serialize(writer)?;
     Ok(())
 }
 
-/// Set of deleted `DocId`s.
+/// Set of alive `DocId`s.
 #[derive(Clone)]
 pub struct AliveBitSet {
     data: OwnedBytes,
@@ -36,7 +36,7 @@ impl AliveBitSet {
         let directory = RamDirectory::create();
         let path = Path::new("dummydeletebitset");
         let mut wrt = directory.open_write(path).unwrap();
-        write_delete_bitset(&bitset, &mut wrt).unwrap();
+        write_alive_bitset(&bitset, &mut wrt).unwrap();
         wrt.terminate().unwrap();
         let file = directory.open_read(path).unwrap();
         Self::open(file).unwrap()
@@ -89,54 +89,54 @@ mod tests {
     use super::AliveBitSet;
 
     #[test]
-    fn test_delete_bitset_empty() {
-        let delete_bitset = AliveBitSet::for_test(&[], 10);
+    fn test_alive_bitset_empty() {
+        let alive_bitset = AliveBitSet::for_test(&[], 10);
         for doc in 0..10 {
-            assert_eq!(delete_bitset.is_deleted(doc), !delete_bitset.is_alive(doc));
+            assert_eq!(alive_bitset.is_deleted(doc), !alive_bitset.is_alive(doc));
         }
-        assert_eq!(delete_bitset.num_deleted(), 0);
+        assert_eq!(alive_bitset.num_deleted(), 0);
     }
 
     #[test]
-    fn test_delete_bitset() {
-        let delete_bitset = AliveBitSet::for_test(&[1, 9], 10);
-        assert!(delete_bitset.is_alive(0));
-        assert!(delete_bitset.is_deleted(1));
-        assert!(delete_bitset.is_alive(2));
-        assert!(delete_bitset.is_alive(3));
-        assert!(delete_bitset.is_alive(4));
-        assert!(delete_bitset.is_alive(5));
-        assert!(delete_bitset.is_alive(6));
-        assert!(delete_bitset.is_alive(6));
-        assert!(delete_bitset.is_alive(7));
-        assert!(delete_bitset.is_alive(8));
-        assert!(delete_bitset.is_deleted(9));
+    fn test_alive_bitset() {
+        let alive_bitset = AliveBitSet::for_test(&[1, 9], 10);
+        assert!(alive_bitset.is_alive(0));
+        assert!(alive_bitset.is_deleted(1));
+        assert!(alive_bitset.is_alive(2));
+        assert!(alive_bitset.is_alive(3));
+        assert!(alive_bitset.is_alive(4));
+        assert!(alive_bitset.is_alive(5));
+        assert!(alive_bitset.is_alive(6));
+        assert!(alive_bitset.is_alive(6));
+        assert!(alive_bitset.is_alive(7));
+        assert!(alive_bitset.is_alive(8));
+        assert!(alive_bitset.is_deleted(9));
         for doc in 0..10 {
-            assert_eq!(delete_bitset.is_deleted(doc), !delete_bitset.is_alive(doc));
+            assert_eq!(alive_bitset.is_deleted(doc), !alive_bitset.is_alive(doc));
         }
-        assert_eq!(delete_bitset.num_deleted(), 2);
+        assert_eq!(alive_bitset.num_deleted(), 2);
     }
 
     #[test]
-    fn test_delete_bitset_iter_minimal() {
-        let delete_bitset = AliveBitSet::for_test(&[7], 8);
+    fn test_alive_bitset_iter_minimal() {
+        let alive_bitset = AliveBitSet::for_test(&[7], 8);
 
-        let data: Vec<_> = delete_bitset.iter_unset().collect();
+        let data: Vec<_> = alive_bitset.iter_unset().collect();
         assert_eq!(data, vec![0, 1, 2, 3, 4, 5, 6]);
     }
 
     #[test]
-    fn test_delete_bitset_iter_small() {
-        let delete_bitset = AliveBitSet::for_test(&[0, 2, 3, 6], 7);
+    fn test_alive_bitset_iter_small() {
+        let alive_bitset = AliveBitSet::for_test(&[0, 2, 3, 6], 7);
 
-        let data: Vec<_> = delete_bitset.iter_unset().collect();
+        let data: Vec<_> = alive_bitset.iter_unset().collect();
         assert_eq!(data, vec![1, 4, 5]);
     }
     #[test]
-    fn test_delete_bitset_iter() {
-        let delete_bitset = AliveBitSet::for_test(&[0, 1, 1000], 1001);
+    fn test_alive_bitset_iter() {
+        let alive_bitset = AliveBitSet::for_test(&[0, 1, 1000], 1001);
 
-        let data: Vec<_> = delete_bitset.iter_unset().collect();
+        let data: Vec<_> = alive_bitset.iter_unset().collect();
         assert_eq!(data, (2..=999).collect::<Vec<_>>());
     }
 }
@@ -164,36 +164,36 @@ mod bench {
 
     #[bench]
     fn bench_deletebitset_iter_deser_on_fly(bench: &mut Bencher) {
-        let delete_bitset = AliveBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
+        let alive_bitset = AliveBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
 
-        bench.iter(|| delete_bitset.iter_unset().collect::<Vec<_>>());
+        bench.iter(|| alive_bitset.iter_unset().collect::<Vec<_>>());
     }
 
     #[bench]
     fn bench_deletebitset_access(bench: &mut Bencher) {
-        let delete_bitset = AliveBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
+        let alive_bitset = AliveBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
 
         bench.iter(|| {
             (0..1_000_000_u32)
-                .filter(|doc| delete_bitset.is_alive(*doc))
+                .filter(|doc| alive_bitset.is_alive(*doc))
                 .collect::<Vec<_>>()
         });
     }
 
     #[bench]
     fn bench_deletebitset_iter_deser_on_fly_1_8_alive(bench: &mut Bencher) {
-        let delete_bitset = AliveBitSet::for_test(&get_alive(), 1_000_000);
+        let alive_bitset = AliveBitSet::for_test(&get_alive(), 1_000_000);
 
-        bench.iter(|| delete_bitset.iter_unset().collect::<Vec<_>>());
+        bench.iter(|| alive_bitset.iter_unset().collect::<Vec<_>>());
     }
 
     #[bench]
     fn bench_deletebitset_access_1_8_alive(bench: &mut Bencher) {
-        let delete_bitset = AliveBitSet::for_test(&get_alive(), 1_000_000);
+        let alive_bitset = AliveBitSet::for_test(&get_alive(), 1_000_000);
 
         bench.iter(|| {
             (0..1_000_000_u32)
-                .filter(|doc| delete_bitset.is_alive(*doc))
+                .filter(|doc| alive_bitset.is_alive(*doc))
                 .collect::<Vec<_>>()
         });
     }
diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs
index 73a3a475c..dd100074c 100644
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -23,7 +23,7 @@ values stored.
 Read access performance is comparable to that of an array lookup.
 */
 
-pub use self::alive_bitset::write_delete_bitset;
+pub use self::alive_bitset::write_alive_bitset;
 pub use self::alive_bitset::AliveBitSet;
 pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
 pub use self::error::{FastFieldNotAvailableError, Result};
diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs
index e4623e548..ff45df9d3 100644
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -11,7 +11,7 @@ use crate::directory::TerminatingWrite;
 use crate::directory::{DirectoryLock, GarbageCollectionResult};
 use crate::docset::{DocSet, TERMINATED};
 use crate::error::TantivyError;
-use crate::fastfield::write_delete_bitset;
+use crate::fastfield::write_alive_bitset;
 use crate::indexer::delete_queue::{DeleteCursor, DeleteQueue};
 use crate::indexer::doc_opstamp_mapping::DocToOpstampMapping;
 use crate::indexer::operation::DeleteOperation;
@@ -93,7 +93,7 @@ pub struct IndexWriter {
 }
 
 fn compute_deleted_bitset(
-    delete_bitset: &mut BitSet,
+    alive_bitset: &mut BitSet,
     segment_reader: &SegmentReader,
     delete_cursor: &mut DeleteCursor,
     doc_opstamps: &DocToOpstampMapping,
@@ -114,7 +114,7 @@ fn compute_deleted_bitset(
             let mut doc_matching_deleted_term = docset.doc();
             while doc_matching_deleted_term != TERMINATED {
                 if doc_opstamps.is_deleted(doc_matching_deleted_term, delete_op.opstamp) {
-                    delete_bitset.remove(doc_matching_deleted_term);
+                    alive_bitset.remove(doc_matching_deleted_term);
                     might_have_changed = true;
                 }
                 doc_matching_deleted_term = docset.advance();
@@ -141,7 +141,7 @@ pub(crate) fn advance_deletes(
         return Ok(());
     }
 
-    if segment_entry.delete_bitset().is_none() && segment_entry.delete_cursor().get().is_none() {
+    if segment_entry.alive_bitset().is_none() && segment_entry.delete_cursor().get().is_none() {
         // There has been no `DeleteOperation` between the segment status and `target_opstamp`.
         return Ok(());
     }
@@ -149,15 +149,15 @@ pub(crate) fn advance_deletes(
     let segment_reader = SegmentReader::open(&segment)?;
 
     let max_doc = segment_reader.max_doc();
-    let mut delete_bitset: BitSet = match segment_entry.delete_bitset() {
-        Some(previous_delete_bitset) => (*previous_delete_bitset).clone(),
+    let mut alive_bitset: BitSet = match segment_entry.alive_bitset() {
+        Some(previous_alive_bitset) => (*previous_alive_bitset).clone(),
         None => BitSet::with_max_value_and_filled(max_doc),
     };
 
     let num_deleted_docs_before = segment.meta().num_deleted_docs();
 
     compute_deleted_bitset(
-        &mut delete_bitset,
+        &mut alive_bitset,
         &segment_reader,
         segment_entry.delete_cursor(),
         &DocToOpstampMapping::None,
@@ -167,21 +167,21 @@ pub(crate) fn advance_deletes(
     // TODO optimize
     // It should be possible to do something smarter by manipulation bitsets directly
     // to compute this union.
-    if let Some(seg_delete_bitset) = segment_reader.delete_bitset() {
+    if let Some(seg_alive_bitset) = segment_reader.alive_bitset() {
         for doc in 0u32..max_doc {
-            if seg_delete_bitset.is_deleted(doc) {
-                delete_bitset.remove(doc);
+            if seg_alive_bitset.is_deleted(doc) {
+                alive_bitset.remove(doc);
             }
         }
     }
 
-    let num_alive_docs: u32 = delete_bitset.num_set_bits() as u32;
+    let num_alive_docs: u32 = alive_bitset.num_set_bits() as u32;
     let num_deleted_docs = max_doc - num_alive_docs;
     if num_deleted_docs > num_deleted_docs_before {
         // There are new deletes. We need to write a new delete file.
         segment = segment.with_delete_meta(num_deleted_docs as u32, target_opstamp);
         let mut delete_file = segment.open_write(SegmentComponent::Delete)?;
-        write_delete_bitset(&delete_bitset, &mut delete_file)?;
+        write_alive_bitset(&alive_bitset, &mut delete_file)?;
         delete_file.terminate()?;
     }
 
@@ -227,13 +227,13 @@ fn index_documents(
 
     let segment_with_max_doc = segment.with_max_doc(max_doc);
 
-    let delete_bitset_opt =
+    let alive_bitset_opt =
         apply_deletes(&segment_with_max_doc, &mut delete_cursor, &doc_opstamps)?;
 
     let meta = segment_with_max_doc.meta().clone();
     meta.untrack_temp_docstore();
     // update segment_updater inventory to remove tempstore
-    let segment_entry = SegmentEntry::new(meta, delete_cursor, delete_bitset_opt);
+    let segment_entry = SegmentEntry::new(meta, delete_cursor, alive_bitset_opt);
     block_on(segment_updater.schedule_add_segment(segment_entry))?;
     Ok(true)
 }
@@ -1514,7 +1514,7 @@ mod tests {
         for segment_reader in searcher.segment_readers().iter() {
             let store_reader = segment_reader.get_store_reader().unwrap();
             // test store iterator
-            for doc in store_reader.iter(segment_reader.delete_bitset()) {
+            for doc in store_reader.iter(segment_reader.alive_bitset()) {
                 let id = doc
                     .unwrap()
                     .get_first(id_field)
diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs
index a34fd541c..84151c8b1 100644
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -101,7 +101,7 @@ fn compute_min_max_val(
     if segment_reader.max_doc() == 0 {
         None
     } else {
-        if segment_reader.delete_bitset().is_some() {
+        if segment_reader.alive_bitset().is_some() {
             // some deleted documents,
             // we need to recompute the max / min
             minmax(
@@ -497,8 +497,8 @@ impl IndexMerger {
         // what should be the bit length use for bitpacking.
         let mut num_docs = 0;
         for (reader, u64s_reader) in reader_and_field_accessors.iter() {
-            if let Some(delete_bitset) = reader.delete_bitset() {
-                num_docs += reader.max_doc() as u64 - delete_bitset.num_deleted() as u64;
+            if let Some(alive_bitset) = reader.alive_bitset() {
+                num_docs += reader.max_doc() as u64 - alive_bitset.num_deleted() as u64;
                 for doc in reader.doc_ids_alive() {
                     let num_vals = u64s_reader.get_len(doc) as u64;
                     total_num_vals += num_vals;
@@ -888,9 +888,9 @@ impl IndexMerger {
                 let inverted_index: &InvertedIndexReader = &*field_readers[segment_ord];
                 let segment_postings = inverted_index
                     .read_postings_from_terminfo(&term_info, segment_postings_option)?;
-                let delete_bitset_opt = segment_reader.delete_bitset();
-                let doc_freq = if let Some(delete_bitset) = delete_bitset_opt {
-                    segment_postings.doc_freq_given_deletes(delete_bitset)
+                let alive_bitset_opt = segment_reader.alive_bitset();
+                let doc_freq = if let Some(alive_bitset) = alive_bitset_opt {
+                    segment_postings.doc_freq_given_deletes(alive_bitset)
                 } else {
                     segment_postings.doc_freq()
                 };
@@ -1010,7 +1010,7 @@ impl IndexMerger {
         let mut document_iterators: Vec<_> = store_readers
             .iter()
             .enumerate()
-            .map(|(i, store)| store.iter_raw(self.readers[i].delete_bitset()))
+            .map(|(i, store)| store.iter_raw(self.readers[i].alive_bitset()))
             .collect();
         if !doc_id_mapping.is_trivial() {
             for (old_doc_id, reader_with_ordinal) in doc_id_mapping.iter() {
@@ -1046,7 +1046,7 @@ impl IndexMerger {
                     || store_reader.block_checkpoints().take(7).count() < 6
                     || store_reader.compressor() != store_writer.compressor()
                 {
-                    for doc_bytes_res in store_reader.iter_raw(reader.delete_bitset()) {
+                    for doc_bytes_res in store_reader.iter_raw(reader.alive_bitset()) {
                         let doc_bytes = doc_bytes_res?;
                         store_writer.store_bytes(&doc_bytes)?;
                     }
diff --git a/src/indexer/merger_sorted_index_test.rs b/src/indexer/merger_sorted_index_test.rs
index a9950affa..fd9b4883b 100644
--- a/src/indexer/merger_sorted_index_test.rs
+++ b/src/indexer/merger_sorted_index_test.rs
@@ -260,7 +260,7 @@ mod tests {
             let fallback_bitset = AliveBitSet::for_test(&[0], 100);
             assert_eq!(
                 postings.doc_freq_given_deletes(
-                    segment_reader.delete_bitset().unwrap_or(&fallback_bitset)
+                    segment_reader.alive_bitset().unwrap_or(&fallback_bitset)
                 ),
                 2
             );
@@ -339,7 +339,7 @@ mod tests {
             let fallback_bitset = AliveBitSet::for_test(&[0], 100);
             assert_eq!(
                 postings.doc_freq_given_deletes(
-                    segment_reader.delete_bitset().unwrap_or(&fallback_bitset)
+                    segment_reader.alive_bitset().unwrap_or(&fallback_bitset)
                 ),
                 2
             );
@@ -449,7 +449,7 @@ mod tests {
             let fallback_bitset = AliveBitSet::for_test(&[0], 100);
             assert_eq!(
                 postings.doc_freq_given_deletes(
-                    segment_reader.delete_bitset().unwrap_or(&fallback_bitset)
+                    segment_reader.alive_bitset().unwrap_or(&fallback_bitset)
                 ),
                 2
             );
diff --git a/src/indexer/segment_entry.rs b/src/indexer/segment_entry.rs
index e0beb2179..b7cae25c1 100644
--- a/src/indexer/segment_entry.rs
+++ b/src/indexer/segment_entry.rs
@@ -9,16 +9,16 @@ use std::fmt;
 ///
 /// In addition to segment `meta`,
 /// it contains a few transient states
-/// - `delete_bitset` is a bitset describing
-/// documents that were deleted during the commit
+/// - `alive_bitset` is a bitset describing
+/// documents that were alive during the commit
 /// itself.
 /// - `delete_cursor` is the position in the delete queue.
 /// Deletes happening before the cursor are reflected either
-/// in the .del file or in the `delete_bitset`.
+/// in the .del file or in the `alive_bitset`.
 #[derive(Clone)]
 pub struct SegmentEntry {
     meta: SegmentMeta,
-    delete_bitset: Option<BitSet>,
+    alive_bitset: Option<BitSet>,
     delete_cursor: DeleteCursor,
 }
 
@@ -27,11 +27,11 @@ impl SegmentEntry {
     pub fn new(
         segment_meta: SegmentMeta,
         delete_cursor: DeleteCursor,
-        delete_bitset: Option<BitSet>,
+        alive_bitset: Option<BitSet>,
     ) -> SegmentEntry {
         SegmentEntry {
             meta: segment_meta,
-            delete_bitset,
+            alive_bitset,
             delete_cursor,
         }
     }
@@ -39,8 +39,8 @@ impl SegmentEntry {
     /// Return a reference to the segment entry deleted bitset.
     ///
     /// `DocId` in this bitset are flagged as deleted.
-    pub fn delete_bitset(&self) -> Option<&BitSet> {
-        self.delete_bitset.as_ref()
+    pub fn alive_bitset(&self) -> Option<&BitSet> {
+        self.alive_bitset.as_ref()
     }
 
     /// Set the `SegmentMeta` for this segment.
diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs
index 753737d51..f5e383c37 100644
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -34,7 +34,7 @@ impl SegmentPostings {
     ///
     /// This method will clone and scan through the posting lists.
     /// (this is a rather expensive operation).
-    pub fn doc_freq_given_deletes(&self, delete_bitset: &AliveBitSet) -> u32 {
+    pub fn doc_freq_given_deletes(&self, alive_bitset: &AliveBitSet) -> u32 {
         let mut docset = self.clone();
         let mut doc_freq = 0;
         loop {
@@ -42,7 +42,7 @@ impl SegmentPostings {
             if doc == TERMINATED {
                 return doc_freq;
             }
-            if delete_bitset.is_alive(doc) {
+            if alive_bitset.is_alive(doc) {
                 doc_freq += 1u32;
             }
             docset.advance();
@@ -296,8 +296,8 @@ mod tests {
     fn test_doc_freq() {
         let docs = SegmentPostings::create_from_docs(&[0, 2, 10]);
         assert_eq!(docs.doc_freq(), 3);
-        let delete_bitset = AliveBitSet::for_test(&[2], 12);
-        assert_eq!(docs.doc_freq_given_deletes(&delete_bitset), 2);
+        let alive_bitset = AliveBitSet::for_test(&[2], 12);
+        assert_eq!(docs.doc_freq_given_deletes(&alive_bitset), 2);
         let all_deleted = AliveBitSet::for_test(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 12);
         assert_eq!(docs.doc_freq_given_deletes(&all_deleted), 0);
     }
diff --git a/src/query/boost_query.rs b/src/query/boost_query.rs
index 72bd3ebba..d7eee9efe 100644
--- a/src/query/boost_query.rs
+++ b/src/query/boost_query.rs
@@ -118,8 +118,8 @@ impl<S: Scorer> DocSet for BoostScorer<S> {
         self.underlying.size_hint()
     }
 
-    fn count(&mut self, delete_bitset: &AliveBitSet) -> u32 {
-        self.underlying.count(delete_bitset)
+    fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
+        self.underlying.count(alive_bitset)
     }
 
     fn count_including_deleted(&mut self) -> u32 {
diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs
index 877243f8a..51779124b 100644
--- a/src/query/term_query/term_weight.rs
+++ b/src/query/term_query/term_weight.rs
@@ -40,8 +40,8 @@ impl Weight for TermWeight {
     }
 
     fn count(&self, reader: &SegmentReader) -> crate::Result<u32> {
-        if let Some(delete_bitset) = reader.delete_bitset() {
-            Ok(self.scorer(reader, 1.0)?.count(delete_bitset))
+        if let Some(alive_bitset) = reader.alive_bitset() {
+            Ok(self.scorer(reader, 1.0)?.count(alive_bitset))
         } else {
             let field = self.term.field();
             let inv_index = reader.inverted_index(field)?;
diff --git a/src/query/weight.rs b/src/query/weight.rs
index 772846e3e..3a2ff3d33 100644
--- a/src/query/weight.rs
+++ b/src/query/weight.rs
@@ -59,8 +59,8 @@ pub trait Weight: Send + Sync + 'static {
     /// Returns the number documents within the given `SegmentReader`.
     fn count(&self, reader: &SegmentReader) -> crate::Result<u32> {
         let mut scorer = self.scorer(reader, 1.0)?;
-        if let Some(delete_bitset) = reader.delete_bitset() {
-            Ok(scorer.count(delete_bitset))
+        if let Some(alive_bitset) = reader.alive_bitset() {
+            Ok(scorer.count(alive_bitset))
         } else {
             Ok(scorer.count_including_deleted())
         }
diff --git a/src/store/mod.rs b/src/store/mod.rs
index 0ad341473..364ed5a92 100644
--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -113,7 +113,7 @@ pub mod tests {
     fn test_doc_store_iter_with_delete_bug_1077() -> crate::Result<()> {
         // this will cover deletion of the first element in a checkpoint
         let deleted_docids = (200..300).collect::<Vec<_>>();
-        let delete_bitset = AliveBitSet::for_test(&deleted_docids, NUM_DOCS as u32);
+        let alive_bitset = AliveBitSet::for_test(&deleted_docids, NUM_DOCS as u32);
 
         let path = Path::new("store");
         let directory = RamDirectory::create();
@@ -134,7 +134,7 @@ pub mod tests {
             );
         }
 
-        for (_, doc) in store.iter(Some(&delete_bitset)).enumerate() {
+        for (_, doc) in store.iter(Some(&alive_bitset)).enumerate() {
             let doc = doc?;
             let title_content = doc.get_first(field_title).unwrap().text().unwrap();
             if !title_content.starts_with("Doc ") {
@@ -146,7 +146,7 @@ pub mod tests {
                 .unwrap()
                 .parse::<u32>()
                 .unwrap();
-            if delete_bitset.is_deleted(id) {
+            if alive_bitset.is_deleted(id) {
                 panic!("unexpected deleted document {}", id);
             }
         }
@@ -230,7 +230,7 @@ pub mod tests {
         let searcher = index.reader().unwrap().searcher();
         let reader = searcher.segment_reader(0);
         let store = reader.get_store_reader().unwrap();
-        for doc in store.iter(reader.delete_bitset()) {
+        for doc in store.iter(reader.alive_bitset()) {
             assert_eq!(
                 *doc?.get_first(text_field).unwrap().text().unwrap(),
                 "deletemenot".to_string()
@@ -288,7 +288,7 @@ pub mod tests {
         let reader = searcher.segment_readers().iter().last().unwrap();
         let store = reader.get_store_reader().unwrap();
 
-        for doc in store.iter(reader.delete_bitset()).take(50) {
+        for doc in store.iter(reader.alive_bitset()).take(50) {
             assert_eq!(
                 *doc?.get_first(text_field).unwrap().text().unwrap(),
                 LOREM.to_string()
diff --git a/src/store/reader.rs b/src/store/reader.rs
index 75012718d..98c127d97 100644
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -133,12 +133,12 @@ impl StoreReader {
 
     /// Iterator over all Documents in their order as they are stored in the doc store.
     /// Use this, if you want to extract all Documents from the doc store.
-    /// The delete_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
+    /// The alive_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
     pub fn iter<'a: 'b, 'b>(
         &'b self,
-        delete_bitset: Option<&'a AliveBitSet>,
+        alive_bitset: Option<&'a AliveBitSet>,
     ) -> impl Iterator<Item = crate::Result<Document>> + 'b {
-        self.iter_raw(delete_bitset).map(|doc_bytes_res| {
+        self.iter_raw(alive_bitset).map(|doc_bytes_res| {
             let mut doc_bytes = doc_bytes_res?;
             Ok(Document::deserialize(&mut doc_bytes)?)
         })
@@ -146,10 +146,10 @@ impl StoreReader {
 
     /// Iterator over all RawDocuments in their order as they are stored in the doc store.
     /// Use this, if you want to extract all Documents from the doc store.
-    /// The delete_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
+    /// The alive_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
     pub(crate) fn iter_raw<'a: 'b, 'b>(
         &'b self,
-        delete_bitset: Option<&'a AliveBitSet>,
+        alive_bitset: Option<&'a AliveBitSet>,
     ) -> impl Iterator<Item = crate::Result<OwnedBytes>> + 'b {
         let last_docid = self
             .block_checkpoints()
@@ -179,7 +179,7 @@ impl StoreReader {
                     num_skipped = 0;
                 }
 
-                let alive = delete_bitset.map_or(true, |bitset| bitset.is_alive(doc_id));
+                let alive = alive_bitset.map_or(true, |bitset| bitset.is_alive(doc_id));
                 if alive {
                     let ret = Some((curr_block.clone(), num_skipped, reset_block_pos));
                     // the map block will move over the num_skipped, so we reset to 0

From c27ccd3e241edee74a44b073b1717459e0f96aaf Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Thu, 23 Sep 2021 21:02:09 +0800
Subject: [PATCH 09/13] improve naming

---
 common/src/bitset.rs | 20 ++++++++++----------
 src/query/union.rs   |  8 ++++++--
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/common/src/bitset.rs b/common/src/bitset.rs
index 527abed9b..84c936a61 100644
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -64,19 +64,19 @@ impl TinySet {
     }
 
     #[inline]
-    /// Returns true iff the `TinySet` contains the element `el`.
-    pub fn contains(self, el: u32) -> bool {
-        !self.intersect(TinySet::singleton(el)).is_empty()
+    /// Returns true iff the `TinySet` bit is set at position `pos`.
+    pub fn contains(self, pos: u32) -> bool {
+        !self.intersect(TinySet::singleton(pos)).is_empty()
     }
 
     #[inline]
-    /// Returns the number of elements in the TinySet.
-    pub fn len(self) -> u32 {
+    /// Returns the number of set bits in the TinySet.
+    pub fn num_set(self) -> u32 {
         self.0.count_ones()
     }
 
     #[inline]
-    /// Returns the number of elements in the TinySet.
+    /// Returns the number of unset bits in the TinySet.
     pub fn num_unset(self) -> u32 {
         self.0.count_zeros()
     }
@@ -87,11 +87,11 @@ impl TinySet {
         TinySet(self.0 & other.0)
     }
 
-    /// Creates a new `TinySet` containing only one element
+    /// Creates a new `TinySet` with only one bit set at `pos`.
     /// within `[0; 64[`
     #[inline]
-    pub fn singleton(el: u32) -> TinySet {
-        TinySet(1u64 << u64::from(el))
+    pub fn singleton(pos: u32) -> TinySet {
+        TinySet(1u64 << u64::from(pos))
     }
 
     /// Insert a new element within [0..64)
@@ -203,7 +203,7 @@ impl BitSet {
         let mut tinysets = vec![];
         for chunk in data.chunks_exact(8) {
             let tinyset = TinySet::deserialize(chunk.try_into().unwrap())?;
-            len += tinyset.len() as u64;
+            len += tinyset.num_set() as u64;
             tinysets.push(tinyset);
         }
         Ok(BitSet {
diff --git a/src/query/union.rs b/src/query/union.rs
index cf7b4d956..da6da15c0 100644
--- a/src/query/union.rs
+++ b/src/query/union.rs
@@ -219,14 +219,18 @@ where
         }
         let mut count = self.bitsets[self.cursor..HORIZON_NUM_TINYBITSETS]
             .iter()
-            .map(|bitset| bitset.len())
+            .map(|bitset| bitset.num_set())
             .sum::<u32>()
             + 1;
         for bitset in self.bitsets.iter_mut() {
             bitset.clear();
         }
         while self.refill() {
-            count += self.bitsets.iter().map(|bitset| bitset.len()).sum::<u32>();
+            count += self
+                .bitsets
+                .iter()
+                .map(|bitset| bitset.num_set())
+                .sum::<u32>();
             for bitset in self.bitsets.iter_mut() {
                 bitset.clear();
             }

From c217bfed1e10155a816fcb15ad5808a3cba9278c Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Thu, 23 Sep 2021 21:02:19 +0800
Subject: [PATCH 10/13] cargo fmt

---
 src/indexer/index_writer.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs
index ff45df9d3..c37d41b0a 100644
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -227,8 +227,7 @@ fn index_documents(
 
     let segment_with_max_doc = segment.with_max_doc(max_doc);
 
-    let alive_bitset_opt =
-        apply_deletes(&segment_with_max_doc, &mut delete_cursor, &doc_opstamps)?;
+    let alive_bitset_opt = apply_deletes(&segment_with_max_doc, &mut delete_cursor, &doc_opstamps)?;
 
     let meta = segment_with_max_doc.meta().clone();
     meta.untrack_temp_docstore();

From 5ee5037934ec48bb3a06124bb9a8b0e2a0a1990e Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Fri, 24 Sep 2021 12:53:33 +0800
Subject: [PATCH 11/13] create and use ReadSerializedBitSet

---
 common/Cargo.toml             |   1 +
 common/src/bitset.rs          | 222 ++++++++++++++++++++++------------
 src/core/segment_reader.rs    |   4 +-
 src/docset.rs                 |   2 +-
 src/fastfield/alive_bitset.rs |  42 ++++---
 src/indexer/index_writer.rs   |   6 +-
 src/indexer/merger.rs         |  29 +++--
 src/query/bitset/mod.rs       |   4 +-
 src/query/union.rs            |   8 +-
 9 files changed, 195 insertions(+), 123 deletions(-)

diff --git a/common/Cargo.toml b/common/Cargo.toml
index 94b40a459..1a6703c1e 100644
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -10,6 +10,7 @@ description = "common traits and utility functions used by multiple tantivy subc
 
 [dependencies]
 byteorder = "1.4.3"
+ownedbytes = { version="0.1", path="../ownedbytes" }
 
 [dev-dependencies]
 proptest = "1.0.0"
diff --git a/common/src/bitset.rs b/common/src/bitset.rs
index 84c936a61..a05f786ef 100644
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -1,3 +1,4 @@
+use ownedbytes::OwnedBytes;
 use std::convert::TryInto;
 use std::io::Write;
 use std::u64;
@@ -64,34 +65,28 @@ impl TinySet {
     }
 
     #[inline]
-    /// Returns true iff the `TinySet` bit is set at position `pos`.
-    pub fn contains(self, pos: u32) -> bool {
-        !self.intersect(TinySet::singleton(pos)).is_empty()
+    /// Returns true iff the `TinySet` contains the element `el`.
+    pub fn contains(self, el: u32) -> bool {
+        !self.intersect(TinySet::singleton(el)).is_empty()
     }
 
     #[inline]
-    /// Returns the number of set bits in the TinySet.
-    pub fn num_set(self) -> u32 {
+    /// Returns the number of elements in the TinySet.
+    pub fn len(self) -> u32 {
         self.0.count_ones()
     }
 
-    #[inline]
-    /// Returns the number of unset bits in the TinySet.
-    pub fn num_unset(self) -> u32 {
-        self.0.count_zeros()
-    }
-
     #[inline]
     /// Returns the intersection of `self` and `other`
     pub fn intersect(self, other: TinySet) -> TinySet {
         TinySet(self.0 & other.0)
     }
 
-    /// Creates a new `TinySet` with only one bit set at `pos`.
+    /// Creates a new `TinySet` containing only one element
     /// within `[0; 64[`
     #[inline]
-    pub fn singleton(pos: u32) -> TinySet {
-        TinySet(1u64 << u64::from(pos))
+    pub fn singleton(el: u32) -> TinySet {
+        TinySet(1u64 << u64::from(el))
     }
 
     /// Insert a new element within [0..64)
@@ -108,7 +103,7 @@ impl TinySet {
 
     /// Insert a new element within [0..64)
     ///
-    /// returns true if the bit changed
+    /// returns true if the set changed
     #[inline]
     pub fn insert_mut(&mut self, el: u32) -> bool {
         let old = *self;
@@ -116,9 +111,9 @@ impl TinySet {
         old != *self
     }
 
-    /// Remove a new element within [0..64)
+    /// Remove a element within [0..64)
     ///
-    /// returns true if the bit changed
+    /// returns true if the set changed
     #[inline]
     pub fn remove_mut(&mut self, el: u32) -> bool {
         let old = *self;
@@ -203,7 +198,7 @@ impl BitSet {
         let mut tinysets = vec![];
         for chunk in data.chunks_exact(8) {
             let tinyset = TinySet::deserialize(chunk.try_into().unwrap())?;
-            len += tinyset.num_set() as u64;
+            len += tinyset.len() as u64;
             tinysets.push(tinyset);
         }
         Ok(BitSet {
@@ -213,43 +208,6 @@ impl BitSet {
         })
     }
 
-    /// Count the number of unset bits from serialized data.
-    ///
-    #[inline]
-    pub fn count_unset_from_bytes<'a>(data: &'a [u8]) -> usize {
-        BitSet::iter_tinysets_from_bytes(data)
-            .map(|tinyset| tinyset.num_unset() as usize)
-            .sum()
-    }
-
-    /// Iterate the tinyset on the fly from serialized data.
-    ///
-    #[inline]
-    fn iter_tinysets_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = TinySet> + 'a {
-        assert!((data.len() - 4) % 8 == 0);
-        data[4..].chunks_exact(8).map(move |chunk| {
-            let tinyset: TinySet = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
-            tinyset
-        })
-    }
-
-    /// Iterate over the positions of the unset elements.
-    ///
-    /// max_val needs to be provided, since the last 64 bits may
-    #[inline]
-    pub fn iter_unset_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = u32> + 'a {
-        let max_val: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
-        Self::iter_tinysets_from_bytes(data)
-            .enumerate()
-            .flat_map(move |(chunk_num, tinyset)| {
-                let chunk_base_val = chunk_num as u32 * 64;
-                tinyset
-                    .into_iter()
-                    .map(move |val| val + chunk_base_val)
-                    .take_while(move |doc| *doc < max_val)
-            })
-    }
-
     /// Create a new `BitSet` that may contain elements
     /// within `[0, max_val)`.
     pub fn with_max_value(max_value: u32) -> BitSet {
@@ -262,9 +220,9 @@ impl BitSet {
         }
     }
 
-    /// Create a new `BitSet` that may contain elements
+    /// Create a new `BitSet` that may contain elements. Initially all values will be set.
     /// within `[0, max_val)`.
-    pub fn with_max_value_and_filled(max_value: u32) -> BitSet {
+    pub fn with_max_value_and_full(max_value: u32) -> BitSet {
         let num_buckets = num_buckets(max_value);
         let tinybisets = vec![TinySet::full(); num_buckets as usize].into_boxed_slice();
         BitSet {
@@ -282,11 +240,12 @@ impl BitSet {
     }
 
     /// Returns the number of elements in the `BitSet`.
-    pub fn num_set_bits(&self) -> usize {
+    pub fn len(&self) -> usize {
         self.len as usize
     }
 
     /// Inserts an element in the `BitSet`
+    #[inline]
     pub fn insert(&mut self, el: u32) {
         // we do not check saturated els.
         let higher = el / 64u32;
@@ -299,6 +258,7 @@ impl BitSet {
     }
 
     /// Inserts an element in the `BitSet`
+    #[inline]
     pub fn remove(&mut self, el: u32) {
         // we do not check saturated els.
         let higher = el / 64u32;
@@ -312,14 +272,6 @@ impl BitSet {
 
     /// Returns true iff the elements is in the `BitSet`.
     #[inline]
-    pub fn contains_from_bytes(el: u32, data: &[u8]) -> bool {
-        let byte_offset = 4 + el / 8u32;
-        let b: u8 = data[byte_offset as usize];
-        let shift = (el % 8) as u8;
-        b & (1u8 << shift) != 0
-    }
-
-    /// Returns true iff the elements is in the `BitSet`.
     pub fn contains(&self, el: u32) -> bool {
         self.tinyset(el / 64u32).contains(el % 64)
     }
@@ -349,17 +301,133 @@ impl BitSet {
     }
 }
 
+/// Lazy Read a serialized BitSet.
+#[derive(Clone)]
+pub struct ReadSerializedBitSet {
+    data: OwnedBytes,
+    max_value: u32,
+}
+
+impl ReadSerializedBitSet {
+    pub fn new(data: OwnedBytes) -> Self {
+        let (max_value_data, data) = data.split(4);
+        let max_value: u32 = u32::from_le_bytes(max_value_data.as_ref().try_into().unwrap());
+        ReadSerializedBitSet { data, max_value }
+    }
+
+    /// Count the number of unset bits from serialized data.
+    ///
+    #[inline]
+    pub fn count_unset(&self) -> usize {
+        let lower = self.max_value % 64u32;
+
+        let num_set: usize = self
+            .iter_tinysets()
+            .map(|(tinyset, is_last)| {
+                if is_last {
+                    tinyset.intersect(TinySet::range_lower(lower)).len() as usize
+                } else {
+                    tinyset.len() as usize
+                }
+            })
+            .sum();
+        self.max_value as usize - num_set
+    }
+
+    /// Iterate the tinyset on the fly from serialized data.
+    ///
+    /// Iterator returns (TinySet, is_last) element, so the consumer can ignore up to max_doc in the
+    /// last block.
+    ///
+    #[inline]
+    fn iter_tinysets<'a>(&'a self) -> impl Iterator<Item = (TinySet, bool)> + 'a {
+        assert!((self.data.len()) % 8 == 0);
+        self.data
+            .chunks_exact(8)
+            .enumerate()
+            .map(move |(chunk_num, chunk)| {
+                let is_last = (chunk_num + 1) * 8 == self.data.len();
+
+                let tinyset: TinySet = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
+                (tinyset, is_last)
+            })
+    }
+
+    /// Iterate over the positions of the unset elements.
+    ///
+    #[inline]
+    pub fn iter_unset<'a>(&'a self) -> impl Iterator<Item = u32> + 'a {
+        self.iter_tinysets()
+            .enumerate()
+            .flat_map(move |(chunk_num, (tinyset, _))| {
+                let chunk_base_val = chunk_num as u32 * 64;
+                tinyset
+                    .into_iter()
+                    .map(move |val| val + chunk_base_val)
+                    .take_while(move |doc| *doc < self.max_value)
+            })
+    }
+
+    /// Returns true iff the elements is in the `BitSet`.
+    #[inline]
+    pub fn contains(&self, el: u32) -> bool {
+        let byte_offset = el / 8u32;
+        let b: u8 = self.data[byte_offset as usize];
+        let shift = (el % 8) as u8;
+        b & (1u8 << shift) != 0
+    }
+
+    /// Returns the max_value.
+    #[inline]
+    pub fn max_value(&self) -> u32 {
+        self.max_value
+    }
+}
+
 #[cfg(test)]
 mod tests {
 
     use super::BitSet;
+    use super::ReadSerializedBitSet;
     use super::TinySet;
+    use ownedbytes::OwnedBytes;
     use rand::distributions::Bernoulli;
     use rand::rngs::StdRng;
     use rand::{Rng, SeedableRng};
     use std::collections::HashSet;
     use std::convert::TryInto;
 
+    #[test]
+    fn test_read_serialized_bitset_full() {
+        let mut bitset = BitSet::with_max_value_and_full(5);
+        bitset.remove(3);
+        let mut out = vec![];
+        bitset.serialize(&mut out).unwrap();
+
+        let bitset = ReadSerializedBitSet::new(OwnedBytes::new(out));
+        assert_eq!(bitset.count_unset(), 1);
+    }
+
+    #[test]
+    fn test_read_serialized_bitset_empty() {
+        let mut bitset = BitSet::with_max_value(5);
+        bitset.insert(3);
+        let mut out = vec![];
+        bitset.serialize(&mut out).unwrap();
+
+        let bitset = ReadSerializedBitSet::new(OwnedBytes::new(out));
+        assert_eq!(bitset.count_unset(), 4);
+
+        {
+            let bitset = BitSet::with_max_value(5);
+            let mut out = vec![];
+            bitset.serialize(&mut out).unwrap();
+
+            let bitset = ReadSerializedBitSet::new(OwnedBytes::new(out));
+            assert_eq!(bitset.count_unset(), 5);
+        }
+    }
+
     #[test]
     fn test_tiny_set_remove() {
         {
@@ -452,7 +520,7 @@ mod tests {
                 assert_eq!(hashset.contains(&el), bitset.contains(el));
             }
             assert_eq!(bitset.max_value(), max_value);
-            assert_eq!(bitset.num_set_bits(), els.len());
+            assert_eq!(bitset.len(), els.len());
         };
 
         test_against_hashset(&[], 0);
@@ -506,25 +574,25 @@ mod tests {
     #[test]
     fn test_bitset_len() {
         let mut bitset = BitSet::with_max_value(1_000);
-        assert_eq!(bitset.num_set_bits(), 0);
+        assert_eq!(bitset.len(), 0);
         bitset.insert(3u32);
-        assert_eq!(bitset.num_set_bits(), 1);
+        assert_eq!(bitset.len(), 1);
         bitset.insert(103u32);
-        assert_eq!(bitset.num_set_bits(), 2);
+        assert_eq!(bitset.len(), 2);
         bitset.insert(3u32);
-        assert_eq!(bitset.num_set_bits(), 2);
+        assert_eq!(bitset.len(), 2);
         bitset.insert(103u32);
-        assert_eq!(bitset.num_set_bits(), 2);
+        assert_eq!(bitset.len(), 2);
         bitset.insert(104u32);
-        assert_eq!(bitset.num_set_bits(), 3);
+        assert_eq!(bitset.len(), 3);
         bitset.remove(105u32);
-        assert_eq!(bitset.num_set_bits(), 3);
+        assert_eq!(bitset.len(), 3);
         bitset.remove(104u32);
-        assert_eq!(bitset.num_set_bits(), 2);
+        assert_eq!(bitset.len(), 2);
         bitset.remove(3u32);
-        assert_eq!(bitset.num_set_bits(), 1);
+        assert_eq!(bitset.len(), 1);
         bitset.remove(103u32);
-        assert_eq!(bitset.num_set_bits(), 0);
+        assert_eq!(bitset.len(), 0);
     }
 
     pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs
index 8ec303b6e..b81155646 100644
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -73,7 +73,7 @@ impl SegmentReader {
     /// deleted in the segment.
     pub fn num_deleted_docs(&self) -> DocId {
         self.alive_bitset()
-            .map(|delete_set| delete_set.num_deleted() as DocId)
+            .map(|alive_set| alive_set.num_deleted() as DocId)
             .unwrap_or(0u32)
     }
 
@@ -289,7 +289,7 @@ impl SegmentReader {
     /// Returns an iterator that will iterate over the alive document ids
     pub fn doc_ids_alive(&self) -> Box<dyn Iterator<Item = DocId> + '_> {
         if let Some(alive_bitset) = &self.alive_bitset_opt {
-            Box::new(alive_bitset.iter_unset())
+            Box::new(alive_bitset.iter_alive())
         } else {
             Box::new(0u32..self.max_doc)
         }
diff --git a/src/docset.rs b/src/docset.rs
index 0df231e23..e5430b207 100644
--- a/src/docset.rs
+++ b/src/docset.rs
@@ -89,7 +89,7 @@ pub trait DocSet: Send {
         let mut count = 0u32;
         let mut doc = self.doc();
         while doc != TERMINATED {
-            if !alive_bitset.is_deleted(doc) {
+            if alive_bitset.is_alive(doc) {
                 count += 1u32;
             }
             doc = self.advance();
diff --git a/src/fastfield/alive_bitset.rs b/src/fastfield/alive_bitset.rs
index 3f8136c23..108eb24eb 100644
--- a/src/fastfield/alive_bitset.rs
+++ b/src/fastfield/alive_bitset.rs
@@ -3,6 +3,7 @@ use crate::directory::OwnedBytes;
 use crate::space_usage::ByteCount;
 use crate::DocId;
 use common::BitSet;
+use common::ReadSerializedBitSet;
 use std::io;
 use std::io::Write;
 
@@ -21,16 +22,17 @@ pub fn write_alive_bitset<T: Write>(alive_bitset: &BitSet, writer: &mut T) -> io
 pub struct AliveBitSet {
     data: OwnedBytes,
     num_deleted: usize,
+    bitset: ReadSerializedBitSet,
 }
 
 impl AliveBitSet {
     #[cfg(test)]
-    pub(crate) fn for_test(not_alive_docs: &[DocId], max_doc: u32) -> AliveBitSet {
+    pub(crate) fn for_test(deleted_docs: &[DocId], max_doc: u32) -> AliveBitSet {
         use crate::directory::{Directory, RamDirectory, TerminatingWrite};
         use std::path::Path;
-        assert!(not_alive_docs.iter().all(|&doc| doc < max_doc));
-        let mut bitset = BitSet::with_max_value_and_filled(max_doc);
-        for &doc in not_alive_docs {
+        assert!(deleted_docs.iter().all(|&doc| doc < max_doc));
+        let mut bitset = BitSet::with_max_value_and_full(max_doc);
+        for &doc in deleted_docs {
             bitset.remove(doc);
         }
         let directory = RamDirectory::create();
@@ -45,32 +47,38 @@ impl AliveBitSet {
     /// Opens a delete bitset given its file.
     pub fn open(file: FileSlice) -> crate::Result<AliveBitSet> {
         let bytes = file.read_bytes()?;
-        let num_deleted = BitSet::count_unset_from_bytes(bytes.as_slice());
+        let bitset = ReadSerializedBitSet::new(bytes.clone());
+        let num_deleted = bitset.count_unset();
 
         Ok(AliveBitSet {
             data: bytes,
             num_deleted,
+            bitset,
         })
     }
 
     /// Returns true iff the document is still "alive". In other words, if it has not been deleted.
     #[inline]
     pub fn is_alive(&self, doc: DocId) -> bool {
-        !self.is_deleted(doc)
+        self.bitset.contains(doc)
     }
 
     /// Returns true iff the document has been marked as deleted.
     #[inline]
     pub fn is_deleted(&self, doc: DocId) -> bool {
-        let data = self.data.as_slice();
-        !BitSet::contains_from_bytes(doc, data)
+        !self.is_alive(doc)
     }
 
-    /// Iterate over the positions of the set elements
+    /// Iterate over the alive docids.
     #[inline]
-    pub fn iter_unset(&self) -> impl Iterator<Item = u32> + '_ {
-        let data = self.data.as_slice();
-        BitSet::iter_unset_from_bytes(data)
+    pub fn iter_alive(&self) -> impl Iterator<Item = DocId> + '_ {
+        self.bitset.iter_unset()
+    }
+
+    /// Get underlying bitset
+    #[inline]
+    pub fn bitset(&self) -> &ReadSerializedBitSet {
+        &self.bitset
     }
 
     /// The number of deleted docs
@@ -121,7 +129,7 @@ mod tests {
     fn test_alive_bitset_iter_minimal() {
         let alive_bitset = AliveBitSet::for_test(&[7], 8);
 
-        let data: Vec<_> = alive_bitset.iter_unset().collect();
+        let data: Vec<_> = alive_bitset.iter_alive().collect();
         assert_eq!(data, vec![0, 1, 2, 3, 4, 5, 6]);
     }
 
@@ -129,14 +137,14 @@ mod tests {
     fn test_alive_bitset_iter_small() {
         let alive_bitset = AliveBitSet::for_test(&[0, 2, 3, 6], 7);
 
-        let data: Vec<_> = alive_bitset.iter_unset().collect();
+        let data: Vec<_> = alive_bitset.iter_alive().collect();
         assert_eq!(data, vec![1, 4, 5]);
     }
     #[test]
     fn test_alive_bitset_iter() {
         let alive_bitset = AliveBitSet::for_test(&[0, 1, 1000], 1001);
 
-        let data: Vec<_> = alive_bitset.iter_unset().collect();
+        let data: Vec<_> = alive_bitset.iter_alive().collect();
         assert_eq!(data, (2..=999).collect::<Vec<_>>());
     }
 }
@@ -166,7 +174,7 @@ mod bench {
     fn bench_deletebitset_iter_deser_on_fly(bench: &mut Bencher) {
         let alive_bitset = AliveBitSet::for_test(&[0, 1, 1000, 10000], 1_000_000);
 
-        bench.iter(|| alive_bitset.iter_unset().collect::<Vec<_>>());
+        bench.iter(|| alive_bitset.iter_alive().collect::<Vec<_>>());
     }
 
     #[bench]
@@ -184,7 +192,7 @@ mod bench {
     fn bench_deletebitset_iter_deser_on_fly_1_8_alive(bench: &mut Bencher) {
         let alive_bitset = AliveBitSet::for_test(&get_alive(), 1_000_000);
 
-        bench.iter(|| alive_bitset.iter_unset().collect::<Vec<_>>());
+        bench.iter(|| alive_bitset.iter_alive().collect::<Vec<_>>());
     }
 
     #[bench]
diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs
index c37d41b0a..6250db86a 100644
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -151,7 +151,7 @@ pub(crate) fn advance_deletes(
     let max_doc = segment_reader.max_doc();
     let mut alive_bitset: BitSet = match segment_entry.alive_bitset() {
         Some(previous_alive_bitset) => (*previous_alive_bitset).clone(),
-        None => BitSet::with_max_value_and_filled(max_doc),
+        None => BitSet::with_max_value_and_full(max_doc),
     };
 
     let num_deleted_docs_before = segment.meta().num_deleted_docs();
@@ -175,7 +175,7 @@ pub(crate) fn advance_deletes(
         }
     }
 
-    let num_alive_docs: u32 = alive_bitset.num_set_bits() as u32;
+    let num_alive_docs: u32 = alive_bitset.len() as u32;
     let num_deleted_docs = max_doc - num_alive_docs;
     if num_deleted_docs > num_deleted_docs_before {
         // There are new deletes. We need to write a new delete file.
@@ -259,7 +259,7 @@ fn apply_deletes(
     let doc_to_opstamps = DocToOpstampMapping::WithMap(doc_opstamps);
 
     let max_doc = segment.meta().max_doc();
-    let mut deleted_bitset = BitSet::with_max_value_and_filled(max_doc);
+    let mut deleted_bitset = BitSet::with_max_value_and_full(max_doc);
     let may_have_deletes = compute_deleted_bitset(
         &mut deleted_bitset,
         &segment_reader,
diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs
index 84151c8b1..8932c6790 100644
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -99,22 +99,21 @@ fn compute_min_max_val(
     segment_reader: &SegmentReader,
 ) -> Option<(u64, u64)> {
     if segment_reader.max_doc() == 0 {
-        None
-    } else {
-        if segment_reader.alive_bitset().is_some() {
-            // some deleted documents,
-            // we need to recompute the max / min
-            minmax(
-                segment_reader
-                    .doc_ids_alive()
-                    .map(|doc_id| u64_reader.get(doc_id)),
-            )
-        } else {
-            // no deleted documents,
-            // we can use the previous min_val, max_val.
-            Some((u64_reader.min_value(), u64_reader.max_value()))
-        }
+        return None;
     }
+
+    if segment_reader.alive_bitset().is_none() {
+        // no deleted documents,
+        // we can use the previous min_val, max_val.
+        return Some((u64_reader.min_value(), u64_reader.max_value()));
+    }
+    // some deleted documents,
+    // we need to recompute the max / min
+    minmax(
+        segment_reader
+            .doc_ids_alive()
+            .map(|doc_id| u64_reader.get(doc_id)),
+    )
 }
 
 struct TermOrdinalMapping {
diff --git a/src/query/bitset/mod.rs b/src/query/bitset/mod.rs
index ebd6e7b36..030fdeae7 100644
--- a/src/query/bitset/mod.rs
+++ b/src/query/bitset/mod.rs
@@ -90,7 +90,7 @@ impl DocSet for BitSetDocSet {
     /// but we don't have access to any better
     /// value.
     fn size_hint(&self) -> u32 {
-        self.docs.num_set_bits() as u32
+        self.docs.len() as u32
     }
 }
 
@@ -124,7 +124,7 @@ mod tests {
         for i in 0..100_000 {
             assert_eq!(btreeset.contains(&i), bitset.contains(i));
         }
-        assert_eq!(btreeset.len(), bitset.num_set_bits());
+        assert_eq!(btreeset.len(), bitset.len());
         let mut bitset_docset = BitSetDocSet::from(bitset);
         let mut remaining = true;
         for el in btreeset.into_iter() {
diff --git a/src/query/union.rs b/src/query/union.rs
index da6da15c0..cf7b4d956 100644
--- a/src/query/union.rs
+++ b/src/query/union.rs
@@ -219,18 +219,14 @@ where
         }
         let mut count = self.bitsets[self.cursor..HORIZON_NUM_TINYBITSETS]
             .iter()
-            .map(|bitset| bitset.num_set())
+            .map(|bitset| bitset.len())
             .sum::<u32>()
             + 1;
         for bitset in self.bitsets.iter_mut() {
             bitset.clear();
         }
         while self.refill() {
-            count += self
-                .bitsets
-                .iter()
-                .map(|bitset| bitset.num_set())
-                .sum::<u32>();
+            count += self.bitsets.iter().map(|bitset| bitset.len()).sum::<u32>();
             for bitset in self.bitsets.iter_mut() {
                 bitset.clear();
             }

From 22bcc83d106ee77b666c9b1a812aee6be4e38a9b Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Fri, 24 Sep 2021 14:43:04 +0800
Subject: [PATCH 12/13] fix padding in initialization

---
 common/src/bitset.rs          | 51 ++++++++++++++++-------------------
 src/fastfield/alive_bitset.rs |  4 +--
 2 files changed, 25 insertions(+), 30 deletions(-)

diff --git a/common/src/bitset.rs b/common/src/bitset.rs
index a05f786ef..8b024e3d4 100644
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -60,7 +60,10 @@ impl TinySet {
 
     #[inline]
     /// Returns the complement of the set in `[0, 64[`.
-    pub fn complement(self) -> TinySet {
+    ///
+    /// Careful on making this function public, as it will break the padding handling in the last
+    /// bucket.
+    fn complement(self) -> TinySet {
         TinySet(!self.0)
     }
 
@@ -224,7 +227,12 @@ impl BitSet {
     /// within `[0, max_val)`.
     pub fn with_max_value_and_full(max_value: u32) -> BitSet {
         let num_buckets = num_buckets(max_value);
-        let tinybisets = vec![TinySet::full(); num_buckets as usize].into_boxed_slice();
+        let mut tinybisets = vec![TinySet::full(); num_buckets as usize].into_boxed_slice();
+
+        // Fix padding
+        let lower = max_value % 64u32;
+        tinybisets[tinybisets.len() - 1] = TinySet::range_lower(lower);
+
         BitSet {
             tinysets: tinybisets,
             len: max_value as u64,
@@ -309,7 +317,7 @@ pub struct ReadSerializedBitSet {
 }
 
 impl ReadSerializedBitSet {
-    pub fn new(data: OwnedBytes) -> Self {
+    pub fn open(data: OwnedBytes) -> Self {
         let (max_value_data, data) = data.split(4);
         let max_value: u32 = u32::from_le_bytes(max_value_data.as_ref().try_into().unwrap());
         ReadSerializedBitSet { data, max_value }
@@ -319,17 +327,9 @@ impl ReadSerializedBitSet {
     ///
     #[inline]
     pub fn count_unset(&self) -> usize {
-        let lower = self.max_value % 64u32;
-
         let num_set: usize = self
             .iter_tinysets()
-            .map(|(tinyset, is_last)| {
-                if is_last {
-                    tinyset.intersect(TinySet::range_lower(lower)).len() as usize
-                } else {
-                    tinyset.len() as usize
-                }
-            })
+            .map(|tinyset| tinyset.len() as usize)
             .sum();
         self.max_value as usize - num_set
     }
@@ -340,26 +340,21 @@ impl ReadSerializedBitSet {
     /// last block.
     ///
     #[inline]
-    fn iter_tinysets<'a>(&'a self) -> impl Iterator<Item = (TinySet, bool)> + 'a {
+    fn iter_tinysets<'a>(&'a self) -> impl Iterator<Item = TinySet> + 'a {
         assert!((self.data.len()) % 8 == 0);
-        self.data
-            .chunks_exact(8)
-            .enumerate()
-            .map(move |(chunk_num, chunk)| {
-                let is_last = (chunk_num + 1) * 8 == self.data.len();
-
-                let tinyset: TinySet = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
-                (tinyset, is_last)
-            })
+        self.data.chunks_exact(8).map(move |chunk| {
+            let tinyset: TinySet = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
+            tinyset
+        })
     }
 
-    /// Iterate over the positions of the unset elements.
+    /// Iterate over the positions of the elements.
     ///
     #[inline]
-    pub fn iter_unset<'a>(&'a self) -> impl Iterator<Item = u32> + 'a {
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = u32> + 'a {
         self.iter_tinysets()
             .enumerate()
-            .flat_map(move |(chunk_num, (tinyset, _))| {
+            .flat_map(move |(chunk_num, tinyset)| {
                 let chunk_base_val = chunk_num as u32 * 64;
                 tinyset
                     .into_iter()
@@ -404,7 +399,7 @@ mod tests {
         let mut out = vec![];
         bitset.serialize(&mut out).unwrap();
 
-        let bitset = ReadSerializedBitSet::new(OwnedBytes::new(out));
+        let bitset = ReadSerializedBitSet::open(OwnedBytes::new(out));
         assert_eq!(bitset.count_unset(), 1);
     }
 
@@ -415,7 +410,7 @@ mod tests {
         let mut out = vec![];
         bitset.serialize(&mut out).unwrap();
 
-        let bitset = ReadSerializedBitSet::new(OwnedBytes::new(out));
+        let bitset = ReadSerializedBitSet::open(OwnedBytes::new(out));
         assert_eq!(bitset.count_unset(), 4);
 
         {
@@ -423,7 +418,7 @@ mod tests {
             let mut out = vec![];
             bitset.serialize(&mut out).unwrap();
 
-            let bitset = ReadSerializedBitSet::new(OwnedBytes::new(out));
+            let bitset = ReadSerializedBitSet::open(OwnedBytes::new(out));
             assert_eq!(bitset.count_unset(), 5);
         }
     }
diff --git a/src/fastfield/alive_bitset.rs b/src/fastfield/alive_bitset.rs
index 108eb24eb..0ab4513a2 100644
--- a/src/fastfield/alive_bitset.rs
+++ b/src/fastfield/alive_bitset.rs
@@ -47,7 +47,7 @@ impl AliveBitSet {
     /// Opens a delete bitset given its file.
     pub fn open(file: FileSlice) -> crate::Result<AliveBitSet> {
         let bytes = file.read_bytes()?;
-        let bitset = ReadSerializedBitSet::new(bytes.clone());
+        let bitset = ReadSerializedBitSet::open(bytes.clone());
         let num_deleted = bitset.count_unset();
 
         Ok(AliveBitSet {
@@ -72,7 +72,7 @@ impl AliveBitSet {
     /// Iterate over the alive docids.
     #[inline]
     pub fn iter_alive(&self) -> impl Iterator<Item = DocId> + '_ {
-        self.bitset.iter_unset()
+        self.bitset.iter()
     }
 
     /// Get underlying bitset

From efc0d8341bc97b2a300b65a21502ec91145d5965 Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Fri, 24 Sep 2021 15:09:21 +0800
Subject: [PATCH 13/13] fix comment

---
 common/src/bitset.rs | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/common/src/bitset.rs b/common/src/bitset.rs
index 8b024e3d4..6ff97e4a3 100644
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -336,9 +336,6 @@ impl ReadSerializedBitSet {
 
     /// Iterate the tinyset on the fly from serialized data.
     ///
-    /// Iterator returns (TinySet, is_last) element, so the consumer can ignore up to max_doc in the
-    /// last block.
-    ///
     #[inline]
     fn iter_tinysets<'a>(&'a self) -> impl Iterator<Item = TinySet> + 'a {
         assert!((self.data.len()) % 8 == 0);