Add testing utilities for hash map, freelist bugfixes

This commit is contained in:
David Freifeld
2025-06-16 16:01:46 -07:00
parent ac87544e79
commit bb1e359872
4 changed files with 161 additions and 103 deletions

View File

@@ -20,7 +20,7 @@ pub mod entry;
mod tests;
use core::{CoreHashMap, INVALID_POS};
use entry::{Entry, OccupiedEntry};
use entry::{Entry, OccupiedEntry, PrevPos};
#[derive(Debug)]
pub struct OutOfMemoryError();
@@ -289,15 +289,15 @@ where
for i in old_num_buckets..num_buckets {
let bucket_ptr = buckets_ptr.add(i as usize);
bucket_ptr.write(core::Bucket {
next: if i < num_buckets {
next: if i < num_buckets-1 {
i as u32 + 1
} else {
inner.free_head
},
prev: if i > 0 {
i as u32 - 1
PrevPos::Chained(i as u32 - 1)
} else {
INVALID_POS
PrevPos::First(INVALID_POS)
},
inner: None,
});
@@ -315,6 +315,10 @@ where
if num_buckets > map.inner.get_num_buckets() as u32 {
panic!("shrink called with a larger number of buckets");
}
_ = self
.shmem_handle
.as_ref()
.expect("shrink called on a fixed-size hash table");
map.inner.alloc_limit = num_buckets;
}
@@ -342,9 +346,21 @@ where
// Would require making a wider error type enum with this and shmem errors.
panic!("unevicted entries in shrinked space")
}
let prev_pos = inner.buckets[i].prev;
if prev_pos != INVALID_POS {
inner.buckets[prev_pos as usize].next = inner.buckets[i].next;
match inner.buckets[i].prev {
PrevPos::First(_) => {
let next_pos = inner.buckets[i].next;
inner.free_head = next_pos;
if next_pos != INVALID_POS {
inner.buckets[next_pos as usize].prev = PrevPos::First(INVALID_POS);
}
},
PrevPos::Chained(j) => {
let next_pos = inner.buckets[i].next;
inner.buckets[j as usize].next = next_pos;
if next_pos != INVALID_POS {
inner.buckets[next_pos as usize].prev = PrevPos::Chained(j);
}
}
}
}
@@ -358,6 +374,7 @@ where
let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
let buckets_ptr = inner.buckets.as_mut_ptr();
self.rehash_dict(inner, buckets_ptr, end_ptr, num_buckets, num_buckets);
inner.alloc_limit = INVALID_POS;
Ok(())
}

View File

@@ -13,7 +13,7 @@ pub(crate) const INVALID_POS: u32 = u32::MAX;
// Bucket
pub(crate) struct Bucket<K, V> {
pub(crate) next: u32,
pub(crate) prev: u32,
pub(crate) prev: PrevPos,
pub(crate) inner: Option<(K, V)>,
}
@@ -65,9 +65,9 @@ where
INVALID_POS
},
prev: if i > 0 {
i as u32 - 1
PrevPos::Chained(i as u32 - 1)
} else {
INVALID_POS
PrevPos::First(INVALID_POS)
},
inner: None,
});
@@ -176,24 +176,15 @@ where
_key: key.clone(), // TODO(quantumish): clone unavoidable?
bucket_pos: pos as u32,
map: self,
prev_pos: if prev == INVALID_POS {
// TODO(quantumish): populating this correctly would require an O(n) scan over the dictionary
// (perhaps not if we refactored the prev field to be itself something like PrevPos). The real
// question though is whether this even needs to be populated correctly? All downstream uses of
// this function so far are just for deletion, which isn't really concerned with the dictionary.
// Then again, it's unintuitive to appear to return a normal OccupiedEntry which really is fake.
PrevPos::First(todo!("unclear what to do here"))
} else {
PrevPos::Chained(prev)
}
prev_pos: prev,
})
}
pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result<u32, FullError> {
let mut pos = self.free_head;
let mut prev = PrevPos::First(self.free_head);
while pos!= INVALID_POS && pos >= self.alloc_limit {
while pos != INVALID_POS && pos >= self.alloc_limit {
let bucket = &mut self.buckets[pos as usize];
prev = PrevPos::Chained(pos);
pos = bucket.next;
@@ -204,16 +195,23 @@ where
match prev {
PrevPos::First(_) => {
let next_pos = self.buckets[pos as usize].next;
self.free_head = next_pos;
self.buckets[next_pos as usize].prev = INVALID_POS;
self.free_head = next_pos;
// HACK(quantumish): Really, the INVALID_POS should be the position within the dictionary.
// This isn't passed into this function, though, and so for now rather than changing that
// we can just check it from `alloc_bucket`. Not a great solution.
if next_pos != INVALID_POS {
self.buckets[next_pos as usize].prev = PrevPos::First(INVALID_POS);
}
}
PrevPos::Chained(p) => if p != INVALID_POS {
let next_pos = self.buckets[pos as usize].next;
self.buckets[p as usize].next = next_pos;
self.buckets[next_pos as usize].prev = p;
if next_pos != INVALID_POS {
self.buckets[next_pos as usize].prev = PrevPos::Chained(p);
}
},
}
let bucket = &mut self.buckets[pos as usize];
self.buckets_in_use += 1;
bucket.next = INVALID_POS;

View File

@@ -10,6 +10,7 @@ pub enum Entry<'a, 'b, K, V> {
Vacant(VacantEntry<'a, 'b, K, V>),
}
#[derive(Clone, Copy)]
pub(crate) enum PrevPos {
First(u32),
Chained(u32),
@@ -58,10 +59,14 @@ impl<'a, 'b, K, V> OccupiedEntry<'a, 'b, K, V> {
}
}
// and add it to the freelist
// and add it to the freelist
if self.map.free_head != INVALID_POS {
self.map.buckets[self.map.free_head as usize].prev = PrevPos::Chained(self.bucket_pos);
}
let bucket = &mut self.map.buckets[self.bucket_pos as usize];
let old_value = bucket.inner.take();
bucket.next = self.map.free_head;
bucket.next = self.map.free_head;
bucket.prev = PrevPos::First(INVALID_POS);
self.map.free_head = self.bucket_pos;
self.map.buckets_in_use -= 1;
@@ -82,6 +87,9 @@ impl<'a, 'b, K: Clone + Hash + Eq, V> VacantEntry<'a, 'b, K, V> {
return Err(FullError());
}
let bucket = &mut self.map.buckets[pos as usize];
if let PrevPos::First(INVALID_POS) = bucket.prev {
bucket.prev = PrevPos::First(self.dict_pos);
}
bucket.next = self.map.dictionary[self.dict_pos as usize];
self.map.dictionary[self.dict_pos as usize] = pos;

View File

@@ -1,6 +1,8 @@
use std::collections::BTreeMap;
use std::collections::HashSet;
use std::fmt::{Debug, Formatter};
use std::mem::uninitialized;
use std::mem::MaybeUninit;
use std::sync::atomic::{AtomicUsize, Ordering};
use crate::hash::HashMapAccess;
@@ -132,8 +134,6 @@ fn apply_op(
map: &mut HashMapAccess<TestKey, usize>,
shadow: &mut BTreeMap<TestKey, usize>,
) {
eprintln!("applying op: {op:?}");
// apply the change to the shadow tree first
let shadow_existing = if let Some(v) = op.1 {
shadow.insert(op.0, v)
@@ -161,16 +161,42 @@ fn apply_op(
assert_eq!(shadow_existing, hash_existing);
}
fn do_random_ops(
num_ops: usize,
size: u32,
del_prob: f64,
writer: &mut HashMapAccess<TestKey, usize>,
shadow: &mut BTreeMap<TestKey, usize>,
rng: &mut rand::rngs::ThreadRng,
) {
for i in 0..num_ops {
let key: TestKey = ((rng.next_u32() % size) as u128).into();
let op = TestOp(key, if rng.random_bool(del_prob) { Some(i) } else { None });
apply_op(&op, writer, shadow);
}
}
fn do_deletes(
num_ops: usize,
writer: &mut HashMapAccess<TestKey, usize>,
shadow: &mut BTreeMap<TestKey, usize>,
) {
for i in 0..num_ops {
let (k, _) = shadow.pop_first().unwrap();
let hash = writer.get_hash_value(&k);
writer.remove_with_hash(&k, hash);
}
}
#[test]
fn random_ops() {
const MAX_MEM_SIZE: usize = 10000000;
let shmem = ShmemHandle::new("test_inserts", 0, MAX_MEM_SIZE).unwrap();
let shmem = ShmemHandle::new("test_inserts", 0, 10000000).unwrap();
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(100000, shmem);
let mut writer = init_struct.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();
let mut rng = rand::rng();
for i in 0..100000 {
@@ -190,88 +216,97 @@ fn random_ops() {
#[test]
fn test_grow() {
const MEM_SIZE: usize = 10000000;
let shmem = ShmemHandle::new("test_grow", 0, MEM_SIZE).unwrap();
let shmem = ShmemHandle::new("test_grow", 0, 10000000).unwrap();
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(1000, shmem);
let mut writer = init_struct.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
for i in 0..10000 {
let key: TestKey = ((rng.next_u32() % 1000) as u128).into();
let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
apply_op(&op, &mut writer, &mut shadow);
if i % 1000 == 0 {
eprintln!("{i} ops processed");
//eprintln!("stats: {:?}", tree_writer.get_statistics());
//test_iter(&tree_writer, &shadow);
}
}
do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
writer.grow(1500).unwrap();
for i in 0..10000 {
let key: TestKey = ((rng.next_u32() % 1500) as u128).into();
let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
apply_op(&op, &mut writer, &mut shadow);
if i % 1000 == 0 {
eprintln!("{i} ops processed");
//eprintln!("stats: {:?}", tree_writer.get_statistics());
//test_iter(&tree_writer, &shadow);
}
}
do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
}
#[test]
fn test_shrink() {
const MEM_SIZE: usize = 10000000;
let shmem = ShmemHandle::new("test_shrink", 0, MEM_SIZE).unwrap();
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(1500, shmem);
let mut writer = init_struct.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
for i in 0..100 {
let key: TestKey = ((rng.next_u32() % 1500) as u128).into();
let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
apply_op(&op, &mut writer, &mut shadow);
if i % 1000 == 0 {
eprintln!("{i} ops processed");
}
}
writer.begin_shrink(1000);
for i in 1000..1500 {
if let Some(entry) = writer.entry_at_bucket(i) {
fn do_shrink(
writer: &mut HashMapAccess<TestKey, usize>,
shadow: &mut BTreeMap<TestKey, usize>,
from: u32,
to: u32
) {
writer.begin_shrink(to);
for i in to..from {
if let Some(entry) = writer.entry_at_bucket(i as usize) {
shadow.remove(&entry._key);
entry.remove();
}
}
writer.finish_shrink().unwrap();
for i in 0..10000 {
let key: TestKey = ((rng.next_u32() % 1000) as u128).into();
let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
apply_op(&op, &mut writer, &mut shadow);
if i % 1000 == 0 {
eprintln!("{i} ops processed");
}
}
}
#[test]
fn test_shrink() {
let shmem = ShmemHandle::new("test_shrink", 0, 10000000).unwrap();
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(1500, shmem);
let mut writer = init_struct.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
do_shrink(&mut writer, &mut shadow, 1500, 1000);
do_deletes(500, &mut writer, &mut shadow);
do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng);
assert!(writer.get_num_buckets_in_use() <= 1000);
}
#[test]
fn test_shrink_grow_seq() {
let shmem = ShmemHandle::new("test_shrink", 0, 10000000).unwrap();
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(1500, shmem);
let mut writer = init_struct.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng);
eprintln!("Shrinking to 750");
do_shrink(&mut writer, &mut shadow, 1000, 750);
do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng);
eprintln!("Growing to 1500");
writer.grow(1500).unwrap();
do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng);
eprintln!("Shrinking to 200");
do_shrink(&mut writer, &mut shadow, 1500, 200);
do_deletes(100, &mut writer, &mut shadow);
do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
eprintln!("Growing to 10k");
writer.grow(10000).unwrap();
do_random_ops(10000, 5000, 0.25, &mut writer, &mut shadow, &mut rng);
}
#[test]
#[should_panic]
fn test_shrink_bigger() {
let shmem = ShmemHandle::new("test_shrink", 0, 10000000).unwrap();
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(1500, shmem);
let mut writer = init_struct.attach_writer();
writer.begin_shrink(2000);
}
#[test]
#[should_panic]
fn test_shrink_early_finish() {
let shmem = ShmemHandle::new("test_shrink", 0, 10000000).unwrap();
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(1500, shmem);
let mut writer = init_struct.attach_writer();
writer.finish_shrink().unwrap();
}
#[test]
#[should_panic]
fn test_shrink_fixed_size() {
let mut area = [MaybeUninit::uninit(); 10000];
let init_struct = HashMapInit::<TestKey, usize>::init_in_fixed_area(3, &mut area);
let mut writer = init_struct.attach_writer();
writer.begin_shrink(1);
}