AliveBitSet instead of DeleteBitSet

This commit is contained in:
Pascal Seitz
2021-09-23 20:03:57 +08:00
parent 4583fa270b
commit a1f5cead96
13 changed files with 177 additions and 82 deletions

View File

@@ -31,7 +31,7 @@ impl IntoIterator for TinySet {
}
impl TinySet {
pub fn serialize(&self, writer: &mut dyn Write) -> io::Result<()> {
pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
writer.write_all(self.0.to_le_bytes().as_ref())
}
@@ -42,17 +42,24 @@ impl TinySet {
}
/// Returns an empty `TinySet`.
#[inline]
pub fn empty() -> TinySet {
TinySet(0u64)
}
/// Returns a full `TinySet`.
#[inline]
pub fn full() -> TinySet {
TinySet::empty().complement()
}
pub fn clear(&mut self) {
self.0 = 0u64;
}
#[inline]
/// Returns the complement of the set in `[0, 64[`.
fn complement(self) -> TinySet {
pub fn complement(self) -> TinySet {
TinySet(!self.0)
}
@@ -68,6 +75,12 @@ impl TinySet {
self.0.count_ones()
}
#[inline]
/// Returns the number of elements in the TinySet.
pub fn num_unset(self) -> u32 {
self.0.count_zeros()
}
#[inline]
/// Returns the intersection of `self` and `other`
pub fn intersect(self, other: TinySet) -> TinySet {
@@ -81,13 +94,21 @@ impl TinySet {
TinySet(1u64 << u64::from(el))
}
/// Insert a new element within [0..64[
/// Insert a new element within [0..64)
#[inline]
pub fn insert(self, el: u32) -> TinySet {
self.union(TinySet::singleton(el))
}
/// Insert a new element within [0..64[
/// Removes an element within [0..64)
#[inline]
pub fn remove(self, el: u32) -> TinySet {
self.intersect(TinySet::singleton(el).complement())
}
/// Insert a new element within [0..64)
///
/// returns true if the bit changed
#[inline]
pub fn insert_mut(&mut self, el: u32) -> bool {
let old = *self;
@@ -95,6 +116,16 @@ impl TinySet {
old != *self
}
/// Remove a new element within [0..64)
///
/// returns true if the bit changed
#[inline]
pub fn remove_mut(&mut self, el: u32) -> bool {
let old = *self;
*self = old.remove(el);
old != *self
}
/// Returns the union of two tinysets
#[inline]
pub fn union(self, other: TinySet) -> TinySet {
@@ -151,7 +182,7 @@ fn num_buckets(max_val: u32) -> u32 {
impl BitSet {
/// serialize a `BitSet`.
///
pub fn serialize(&self, writer: &mut dyn Write) -> io::Result<()> {
pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
writer.write_all(self.max_value.to_le_bytes().as_ref())?;
for tinyset in self.tinysets.iter() {
@@ -163,6 +194,7 @@ impl BitSet {
/// Deserialize a `BitSet`.
///
#[cfg(test)]
pub fn deserialize(mut data: &[u8]) -> io::Result<Self> {
let max_value: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
data = &data[4..];
@@ -181,10 +213,19 @@ impl BitSet {
})
}
/// Count the number of unset bits from serialized data.
///
#[inline]
pub fn count_unset_from_bytes<'a>(data: &'a [u8]) -> usize {
BitSet::iter_tinysets_from_bytes(data)
.map(|tinyset| tinyset.num_unset() as usize)
.sum()
}
/// Iterate the tinyset on the fly from serialized data.
///
#[inline]
pub fn iter_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = TinySet> + 'a {
fn iter_tinysets_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = TinySet> + 'a {
assert!((data.len() - 4) % 8 == 0);
data[4..].chunks_exact(8).map(move |chunk| {
let tinyset: TinySet = TinySet::deserialize(chunk.try_into().unwrap()).unwrap();
@@ -198,8 +239,7 @@ impl BitSet {
#[inline]
pub fn iter_unset_from_bytes<'a>(data: &'a [u8]) -> impl Iterator<Item = u32> + 'a {
let max_val: u32 = u32::from_le_bytes(data[..4].try_into().unwrap());
Self::iter_from_bytes(data)
.map(|tinyset| tinyset.complement())
Self::iter_tinysets_from_bytes(data)
.enumerate()
.flat_map(move |(chunk_num, tinyset)| {
let chunk_base_val = chunk_num as u32 * 64;
@@ -211,7 +251,7 @@ impl BitSet {
}
/// Create a new `BitSet` that may contain elements
/// within `[0, max_val[`.
/// within `[0, max_val)`.
pub fn with_max_value(max_value: u32) -> BitSet {
let num_buckets = num_buckets(max_value);
let tinybisets = vec![TinySet::empty(); num_buckets as usize].into_boxed_slice();
@@ -222,6 +262,18 @@ impl BitSet {
}
}
/// Create a new `BitSet` that may contain elements
/// within `[0, max_val)`.
pub fn with_max_value_and_filled(max_value: u32) -> BitSet {
let num_buckets = num_buckets(max_value);
let tinybisets = vec![TinySet::full(); num_buckets as usize].into_boxed_slice();
BitSet {
tinysets: tinybisets,
len: max_value as u64,
max_value,
}
}
/// Removes all elements from the `BitSet`.
pub fn clear(&mut self) {
for tinyset in self.tinysets.iter_mut() {
@@ -230,7 +282,7 @@ impl BitSet {
}
/// Returns the number of elements in the `BitSet`.
pub fn len(&self) -> usize {
pub fn num_set_bits(&self) -> usize {
self.len as usize
}
@@ -246,6 +298,18 @@ impl BitSet {
};
}
/// Inserts an element in the `BitSet`
pub fn remove(&mut self, el: u32) {
// we do not check saturated els.
let higher = el / 64u32;
let lower = el % 64u32;
self.len -= if self.tinysets[higher as usize].remove_mut(lower) {
1
} else {
0
};
}
/// Returns true iff the elements is in the `BitSet`.
#[inline]
pub fn contains_from_bytes(el: u32, data: &[u8]) -> bool {
@@ -296,6 +360,33 @@ mod tests {
use std::collections::HashSet;
use std::convert::TryInto;
#[test]
fn test_tiny_set_remove() {
{
let mut u = TinySet::empty().insert(63u32).insert(5).remove(63u32);
assert_eq!(u.pop_lowest(), Some(5u32));
assert!(u.pop_lowest().is_none());
}
{
let mut u = TinySet::empty()
.insert(63u32)
.insert(1)
.insert(5)
.remove(63u32);
assert_eq!(u.pop_lowest(), Some(1u32));
assert_eq!(u.pop_lowest(), Some(5u32));
assert!(u.pop_lowest().is_none());
}
{
let mut u = TinySet::empty().insert(1).remove(63u32);
assert_eq!(u.pop_lowest(), Some(1u32));
assert!(u.pop_lowest().is_none());
}
{
let mut u = TinySet::empty().insert(1).remove(1u32);
assert!(u.pop_lowest().is_none());
}
}
#[test]
fn test_tiny_set() {
assert!(TinySet::empty().is_empty());
@@ -361,7 +452,7 @@ mod tests {
assert_eq!(hashset.contains(&el), bitset.contains(el));
}
assert_eq!(bitset.max_value(), max_value);
assert_eq!(bitset.len(), els.len());
assert_eq!(bitset.num_set_bits(), els.len());
};
test_against_hashset(&[], 0);
@@ -415,17 +506,25 @@ mod tests {
#[test]
fn test_bitset_len() {
let mut bitset = BitSet::with_max_value(1_000);
assert_eq!(bitset.len(), 0);
assert_eq!(bitset.num_set_bits(), 0);
bitset.insert(3u32);
assert_eq!(bitset.len(), 1);
assert_eq!(bitset.num_set_bits(), 1);
bitset.insert(103u32);
assert_eq!(bitset.len(), 2);
assert_eq!(bitset.num_set_bits(), 2);
bitset.insert(3u32);
assert_eq!(bitset.len(), 2);
assert_eq!(bitset.num_set_bits(), 2);
bitset.insert(103u32);
assert_eq!(bitset.len(), 2);
assert_eq!(bitset.num_set_bits(), 2);
bitset.insert(104u32);
assert_eq!(bitset.len(), 3);
assert_eq!(bitset.num_set_bits(), 3);
bitset.remove(105u32);
assert_eq!(bitset.num_set_bits(), 3);
bitset.remove(104u32);
assert_eq!(bitset.num_set_bits(), 2);
bitset.remove(3u32);
assert_eq!(bitset.num_set_bits(), 1);
bitset.remove(103u32);
assert_eq!(bitset.num_set_bits(), 0);
}
pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {