Heavily WIP rewrite of hashmap for concurrency + perf

2026-01-08 22:12:56 +00:00 · 2025-07-10 17:34:30 -07:00
parent da6f4dbafe
commit 7f655ddb9a
8 changed files with 1026 additions and 790 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -247,6 +247,15 @@ dependencies = [
 "syn 2.0.100",
 ]

+[[package]]
+name = "atomic"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340"
+dependencies = [
+ "bytemuck",
+]
+
 [[package]]
 name = "atomic-take"
 version = "1.1.0"
@@ -1087,9 +1096,23 @@ checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"

 [[package]]
 name = "bytemuck"
-version = "1.16.3"
+version = "1.23.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83"
+checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422"
+dependencies = [
+ "bytemuck_derive",
+]
+
+[[package]]
+name = "bytemuck_derive"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ecc273b49b3205b83d648f0690daa588925572cc5063745bfe547fe7ec8e1a1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.100",
+]

 [[package]]
 name = "byteorder"
@@ -3951,6 +3974,8 @@ name = "neon-shmem"
 version = "0.1.0"
 dependencies = [
 "ahash",
+ "atomic",
+ "bytemuck",
 "criterion",
 "foldhash",
 "hashbrown 0.15.4",
--- a/libs/neon-shmem/Cargo.toml
+++ b/libs/neon-shmem/Cargo.toml
@@ -12,6 +12,8 @@ rustc-hash = { version = "2.1.1" }
 rand = "0.9.1"
 libc.workspace = true
 lock_api = "0.4.13"
+atomic = "0.6.1"
+bytemuck = { version = "1.23.1", features = ["derive"] }

 [dev-dependencies]
 criterion = { workspace = true, features = ["html_reports"] }
--- a/libs/neon-shmem/src/hash.rs
+++ b/libs/neon-shmem/src/hash.rs
@@ -1,51 +1,43 @@
-//! Resizable hash table implementation on top of byte-level storage (either a [`ShmemHandle`] or a
-//! fixed byte array).
-//!
-
-//! This hash table has two major components: the bucket array and the dictionary. Each bucket
-//! within the bucket array contains a `Option<(K, V)>` and an index of another bucket. In this way
-//! there is both an implicit freelist within the bucket array (`None` buckets point to other `None`
-//! entries) and various hash chains within the bucket array (a Some bucket will point to other Some
-//! buckets that had the same hash).
-//!
-//! Buckets are never moved unless they are within a region that is being shrunk, and so the actual
-//! hash- dependent component is done with the dictionary. When a new key is inserted into the map,
-//! a position within the dictionary is decided based on its hash, the data is inserted into an
-//! empty bucket based off of the freelist, and then the index of said bucket is placed in the
-//! dictionary.
-//!
-//! This map is resizable (if initialized on top of a [`ShmemHandle`]). Both growing and shrinking
-//! happen in-place and are at a high level achieved by expanding/reducing the bucket array and
-//! rebuilding the dictionary by rehashing all keys.
-
 use std::hash::{BuildHasher, Hash};
 use std::mem::MaybeUninit;
+use std::ptr::NonNull;
+use std::sync::atomic::Ordering;

-use crate::{shmem, sync::{
-	PthreadRwLock, RwLock, RwLockReadGuard, RwLockWriteGuard, ValueReadGuard
-}};
 use crate::shmem::ShmemHandle;
 use crate::{shmem, sync::*};

 mod core;
+mod bucket;
 pub mod entry;

 #[cfg(test)]
 mod tests;

-use core::{Bucket, CoreHashMap, INVALID_POS};
-use entry::{Entry, OccupiedEntry, PrevPos, VacantEntry};
+use core::{
+	CoreHashMap, DictShard, EntryKey, EntryType,
+	FullError, MaybeUninitDictShard
+};
+use bucket::{Bucket, BucketIdx};
+use entry::Entry;
+
+/// Wrapper struct around multiple [`ShmemHandle`]s.
+struct HashMapHandles {
+	keys_shmem: ShmemHandle,
+	idxs_shmem: ShmemHandle,
+	vals_shmem: ShmemHandle,
+}

 /// This represents a hash table that (possibly) lives in shared memory.
 /// If a new process is launched with fork(), the child process inherits
 /// this struct.
 #[must_use]
 pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> {
-    shmem_handle: Option<ShmemHandle>,
+    shmem_handles: Option<HashMapHandles>,
    shared_ptr: *mut HashMapShared<'a, K, V>,
-    shared_size: usize,
    hasher: S,
-    num_buckets: u32,
+    num_buckets: usize,
+	num_shards: usize,
+	resize_lock: Mutex<()>,
 }

 /// This is a per-process handle to a hash table that (possibly) lives in shared memory.
@@ -55,9 +47,10 @@ pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> {
 /// XXX: We're not making use of it at the moment, but this struct could
 /// hold process-local information in the future.
 pub struct HashMapAccess<'a, K, V, S = rustc_hash::FxBuildHasher> {
-    shmem_handle: Option<ShmemHandle>,
+    shmem_handles: Option<HashMapHandles>,
    shared_ptr: *mut HashMapShared<'a, K, V>,
    hasher: S,
+	resize_lock: Mutex<()>,
 }

 unsafe impl<K: Sync, V: Sync, S> Sync for HashMapAccess<'_, K, V, S> {}
@@ -70,79 +63,104 @@ impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {
    /// before inserting any entries and before calling attach_writer/reader.
    /// Otherwise different accessors could be using different hash function,
    /// with confusing results.
+	///
+	/// TODO(quantumish): consider splitting out into a separate builder type?
    pub fn with_hasher<T: BuildHasher>(self, hasher: T) -> HashMapInit<'a, K, V, T> {
        HashMapInit {
            hasher,
-            shmem_handle: self.shmem_handle,
+            shmem_handles: self.shmem_handles,
            shared_ptr: self.shared_ptr,
-            shared_size: self.shared_size,
            num_buckets: self.num_buckets,
+			num_shards: self.num_shards,
+			resize_lock: self.resize_lock,
        }
    }

    /// Loosely (over)estimate the size needed to store a hash table with `num_buckets` buckets.
-    pub fn estimate_size(num_buckets: u32) -> usize {
-        // add some margin to cover alignment etc.
-        CoreHashMap::<K, V>::estimate_size(num_buckets) + size_of::<HashMapShared<K, V>>() + 1000
-    }
+    pub fn estimate_sizes(num_buckets: usize, num_shards: usize) -> (usize, usize, usize) {
+		(
+			(size_of::<EntryKey<K>>() * num_buckets)
+				+ (size_of::<libc::pthread_rwlock_t>() * num_shards)
+				+ (size_of::<RwLock<DictShard<'_, K>>>() * num_shards)
+				+ size_of::<HashMapShared<K, V>>()
+				+ 1000,
+			(size_of::<BucketIdx>() * num_buckets)+ 1000,
+			(size_of::<Bucket<V>>() * num_buckets) + 1000
+		)
+	}

+	fn carve_space<T>(ptr: &mut *mut u8, amount: usize) -> *mut T {
+		*ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<T>())) };
+        let out = ptr.cast();
+        *ptr = unsafe { ptr.add(size_of::<T>() * amount) };
+		out
+	}
+	
    fn new(
-        num_buckets: u32,
-        shmem_handle: Option<ShmemHandle>,
-        area_ptr: *mut u8,
-        area_size: usize,
+        num_buckets: usize,
+		num_shards: usize,
+        mut keys_ptr: *mut u8,
+		mut idxs_ptr: *mut u8,
+		mut vals_ptr: *mut u8,
+        shmem_handles: Option<HashMapHandles>,
        hasher: S,
    ) -> Self {
-        let mut ptr: *mut u8 = area_ptr;
-        let end_ptr: *mut u8 = unsafe { ptr.add(area_size) };
-
-        // carve out area for the One Big Lock (TM) and the HashMapShared.
-        ptr = unsafe { ptr.add(ptr.align_offset(align_of::<libc::pthread_rwlock_t>())) };
-        let raw_lock_ptr = ptr;
-        ptr = unsafe { ptr.add(size_of::<libc::pthread_rwlock_t>()) };
-        ptr = unsafe { ptr.add(ptr.align_offset(align_of::<HashMapShared<K, V>>())) };
-        let shared_ptr: *mut HashMapShared<K, V> = ptr.cast();
-        ptr = unsafe { ptr.add(size_of::<HashMapShared<K, V>>()) };
-
-        // carve out the buckets
-        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<core::Bucket<K, V>>())) };
-        let buckets_ptr = ptr;
-        ptr = unsafe { ptr.add(size_of::<core::Bucket<K, V>>() * num_buckets as usize) };
-
-        // use remaining space for the dictionary
-        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<u32>())) };
-        assert!(ptr.addr() < end_ptr.addr());
-        let dictionary_ptr = ptr;
-        let dictionary_size = unsafe { end_ptr.byte_offset_from(ptr) / size_of::<u32>() as isize };
-        assert!(dictionary_size > 0);
+		// Set up the main area: hashmap info at front, keys at back
+		let mutex_ptr = Self::carve_space::<libc::pthread_mutex_t>(&mut keys_ptr, 1);
+		let shared_ptr = Self::carve_space::<HashMapShared<K, V>>(&mut keys_ptr, 1);
+		let shards_ptr = Self::carve_space::<RwLock<DictShard<'_, K>>>(&mut keys_ptr, num_shards);
+		let locks_ptr = Self::carve_space::<libc::pthread_rwlock_t>(&mut keys_ptr, num_shards);
+		let keys_ptr = Self::carve_space::<EntryKey<K>>(&mut keys_ptr, num_buckets);
+		
+		// Set up the area of bucket idxs and the area of buckets. Not much to do!
+		let idxs_ptr = Self::carve_space::<BucketIdx>(&mut idxs_ptr, num_buckets);
+		let vals_ptr = Self::carve_space::<Bucket<V>>(&mut vals_ptr, num_buckets);

+		// Initialize the shards.
+		let shards_uninit: &mut [MaybeUninit<RwLock<MaybeUninitDictShard<'_, K>>>] =
+            unsafe { std::slice::from_raw_parts_mut(shards_ptr.cast(), num_shards) };
+		let shard_size = num_buckets / num_shards;
+		for i in 0..num_shards {
+			let size = ((i + 1) * shard_size).min(num_buckets) - (i * shard_size);
+			unsafe {
+				shards_uninit[i].write(RwLock::from_raw(
+					PthreadRwLock::new(NonNull::new_unchecked(locks_ptr.add(i))),
+					MaybeUninitDictShard {
+						keys: std::slice::from_raw_parts_mut(keys_ptr.add(i * shard_size).cast(), size),
+						idxs: std::slice::from_raw_parts_mut(idxs_ptr.add(i * shard_size).cast(), size)
+					}
+				));
+			};
+		}
+		let shards: &mut [RwLock<MaybeUninitDictShard<'_, K>>] =
+            unsafe { std::slice::from_raw_parts_mut(shards_ptr.cast(), num_shards) };
        let buckets =
-            unsafe { std::slice::from_raw_parts_mut(buckets_ptr.cast(), num_buckets as usize) };
-        let dictionary = unsafe {
-            std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize)
-        };
+            unsafe { std::slice::from_raw_parts_mut(vals_ptr.cast(), num_buckets) };

-        let hashmap = CoreHashMap::new(buckets, dictionary);
-        let lock = RwLock::from_raw(PthreadRwLock::new(raw_lock_ptr.cast()), hashmap);
-        unsafe {
-            std::ptr::write(shared_ptr, lock);
-        }
+        let hashmap = CoreHashMap::new(buckets, shards);
+        unsafe { std::ptr::write(shared_ptr, hashmap); }

+		let resize_lock = Mutex::from_raw(
+			unsafe { PthreadMutex::new(NonNull::new_unchecked(mutex_ptr)) }, ()
+		);
+		
        Self {
+			num_shards,
            num_buckets,
-            shmem_handle,
+            shmem_handles,
            shared_ptr,
-            shared_size: area_size,
            hasher,
+			resize_lock, 
        }
    }

    /// Attach to a hash table for writing.
    pub fn attach_writer(self) -> HashMapAccess<'a, K, V, S> {
        HashMapAccess {
-            shmem_handle: self.shmem_handle,
+            shmem_handles: self.shmem_handles,
            shared_ptr: self.shared_ptr,
            hasher: self.hasher,
+			resize_lock: self.resize_lock,
        }
    }

@@ -152,31 +170,27 @@ impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {
    }
 }

-/// Hash table data that is actually stored in the shared memory area.
-///
-/// NOTE: We carve out the parts from a contiguous chunk. Growing and shrinking the hash table
-/// relies on the memory layout! The data structures are laid out in the contiguous shared memory
-/// area as follows:
-///
-/// [`libc::pthread_rwlock_t`]
-/// [`HashMapShared`]
-/// [buckets]
-/// [dictionary]
-///
-/// In between the above parts, there can be padding bytes to align the parts correctly.
-type HashMapShared<'a, K, V> = RwLock<CoreHashMap<'a, K, V>>;
+type HashMapShared<'a, K, V> = CoreHashMap<'a, K, V>;

 impl<'a, K, V> HashMapInit<'a, K, V, rustc_hash::FxBuildHasher>
 where
    K: Clone + Hash + Eq,
 {
    /// Place the hash table within a user-supplied fixed memory area.
-    pub fn with_fixed(num_buckets: u32, area: &'a mut [MaybeUninit<u8>]) -> Self {
+    pub fn with_fixed(
+		num_buckets: usize,
+		num_shards: usize,
+		area: &'a mut [MaybeUninit<u8>]
+	) -> Self {
+		let (keys_size, idxs_size, _) = Self::estimate_sizes(num_buckets, num_shards);
+		let ptr = area.as_mut_ptr().cast();
        Self::new(
            num_buckets,
+			num_shards,
+            ptr,
+			unsafe { ptr.add(keys_size) },
+			unsafe { ptr.add(keys_size).add(idxs_size) },
            None,
-            area.as_mut_ptr().cast(),
-            area.len(),
            rustc_hash::FxBuildHasher,
        )
    }
@@ -185,45 +199,65 @@ where
    ///
    /// # Panics
    /// Will panic on failure to resize area to expected map size.
-    pub fn with_shmem(num_buckets: u32, shmem: ShmemHandle) -> Self {
-        let size = Self::estimate_size(num_buckets);
-        shmem
-            .set_size(size)
-            .expect("could not resize shared memory area");
-        let ptr = shmem.data_ptr.as_ptr().cast();
+    pub fn with_shmems(
+		num_buckets: usize,
+		num_shards: usize,
+		keys_shmem: ShmemHandle,
+		idxs_shmem: ShmemHandle,
+		vals_shmem: ShmemHandle,
+	) -> Self {
+		let (keys_size, idxs_size, vals_size) = Self::estimate_sizes(num_buckets, num_shards);
+        keys_shmem.set_size(keys_size).expect("could not resize shared memory area");
+        idxs_shmem.set_size(idxs_size).expect("could not resize shared memory area");
+        vals_shmem.set_size(vals_size).expect("could not resize shared memory area");
        Self::new(
            num_buckets,
-            Some(shmem),
-            ptr,
-            size,
+			num_shards,
+            keys_shmem.data_ptr.as_ptr().cast(),
+			idxs_shmem.data_ptr.as_ptr().cast(),
+			vals_shmem.data_ptr.as_ptr().cast(),
+            Some(HashMapHandles { keys_shmem, idxs_shmem, vals_shmem }),
            rustc_hash::FxBuildHasher,
        )
    }

    /// Make a resizable hash map within a new shared memory area with the given name.
-    pub fn new_resizeable_named(num_buckets: u32, max_buckets: u32, name: &str) -> Self {
-        let size = Self::estimate_size(num_buckets);
-        let max_size = Self::estimate_size(max_buckets);
-        let shmem =
-            ShmemHandle::new(name, size, max_size).expect("failed to make shared memory area");
-        let ptr = shmem.data_ptr.as_ptr().cast();
-
+    pub fn new_resizeable_named(
+		num_buckets: usize,
+		max_buckets: usize,
+		num_shards: usize,
+		name: &str
+	) -> Self {
+		let (keys_size, idxs_size, vals_size) = Self::estimate_sizes(num_buckets, num_shards);
+		let (keys_max, idxs_max, vals_max) = Self::estimate_sizes(max_buckets, num_shards);
+        let keys_shmem = ShmemHandle::new(&format!("{name}_keys"), keys_size, keys_max)
+			.expect("failed to make shared memory area");
+		let idxs_shmem = ShmemHandle::new(&format!("{name}_idxs"), idxs_size, idxs_max)
+			.expect("failed to make shared memory area");
+		let vals_shmem = ShmemHandle::new(&format!("{name}_vals"), vals_size, vals_max)
+			.expect("failed to make shared memory area");
        Self::new(
            num_buckets,
-            Some(shmem),
-            ptr,
-            size,
+			num_shards,
+            keys_shmem.data_ptr.as_ptr().cast(),
+			idxs_shmem.data_ptr.as_ptr().cast(),
+			vals_shmem.data_ptr.as_ptr().cast(),
+            Some(HashMapHandles { keys_shmem, idxs_shmem, vals_shmem }),
            rustc_hash::FxBuildHasher,
        )
    }

    /// Make a resizable hash map within a new anonymous shared memory area.
-    pub fn new_resizeable(num_buckets: u32, max_buckets: u32) -> Self {
+    pub fn new_resizeable(
+		num_buckets: usize,
+		max_buckets: usize,
+		num_shards: usize,
+	) -> Self {
        use std::sync::atomic::{AtomicUsize, Ordering};
        static COUNTER: AtomicUsize = AtomicUsize::new(0);
        let val = COUNTER.fetch_add(1, Ordering::Relaxed);
        let name = format!("neon_shmem_hmap{val}");
-        Self::new_resizeable_named(num_buckets, max_buckets, &name)
+        Self::new_resizeable_named(num_buckets, max_buckets, num_shards, &name)
    }
 }

@@ -237,66 +271,27 @@ where
        self.hasher.hash_one(key)
    }

-    fn entry_with_hash(&self, key: K, hash: u64) -> Entry<'a, '_, K, V> {
-        let mut map = unsafe { self.shared_ptr.as_ref() }.unwrap().write();
-        let dict_pos = hash as usize % map.dictionary.len();
-        let first = map.dictionary[dict_pos];
-        if first == INVALID_POS {
-            // no existing entry
-            return Entry::Vacant(VacantEntry {
-                map,
-                key,
-                dict_pos: dict_pos as u32,
-            });
-        }
-
-        let mut prev_pos = PrevPos::First(dict_pos as u32);
-        let mut next = first;
-        loop {
-            let bucket = &mut map.buckets[next as usize];
-            let (bucket_key, _bucket_value) = bucket.inner.as_mut().expect("entry is in use");
-            if *bucket_key == key {
-                // found existing entry
-                return Entry::Occupied(OccupiedEntry {
-                    map,
-                    _key: key,
-                    prev_pos,
-                    bucket_pos: next,
-                });
-            }
-
-            if bucket.next == INVALID_POS {
-                // No existing entry
-                return Entry::Vacant(VacantEntry {
-                    map,
-                    key,
-                    dict_pos: dict_pos as u32,
-                });
-            }
-            prev_pos = PrevPos::Chained(next);
-            next = bucket.next;
-        }
-    }
-
    /// Get a reference to the corresponding value for a key.
    pub fn get<'e>(&'e self, key: &K) -> Option<ValueReadGuard<'e, V>> {
        let hash = self.get_hash_value(key);
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
-        RwLockReadGuard::try_map(map, |m| m.get_with_hash(key, hash)).ok()
+        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
+		map.get_with_hash(key, hash)
    }

    /// Get a reference to the entry containing a key.
-    pub fn entry(&self, key: K) -> Entry<'a, '_, K, V> {
+    pub fn entry(&self, key: K) -> Result<Entry<'a, K, V>, FullError> {
        let hash = self.get_hash_value(&key);
-        self.entry_with_hash(key, hash)
+		let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+        map.entry_with_hash(key, hash)
    }

    /// Remove a key given its hash. Returns the associated value if it existed.
    pub fn remove(&self, key: &K) -> Option<V> {
 		let hash = self.get_hash_value(key);
-        match self.entry_with_hash(key.clone(), hash) {
-            Entry::Occupied(e) => Some(e.remove()),
-            Entry::Vacant(_) => None,
+		let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+        match map.entry_with_hash(key.clone(), hash) {
+            Ok(Entry::Occupied(mut e)) => Some(e.remove()),
+            _ => None,
        }
    }

@@ -306,154 +301,110 @@ where
    /// Will return [`core::FullError`] if there is no more space left in the map.
    pub fn insert(&self, key: K, value: V) -> Result<Option<V>, core::FullError> {
        let hash = self.get_hash_value(&key);
-        match self.entry_with_hash(key.clone(), hash) {
+		let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+        match map.entry_with_hash(key.clone(), hash)? {
            Entry::Occupied(mut e) => Ok(Some(e.insert(value))),
            Entry::Vacant(e) => {
-                _ = e.insert(value)?;
+                _ = e.insert(value);
                Ok(None)
            }
        }
    }

-    /// Optionally return the entry for a bucket at a given index if it exists.
-    ///
-    /// Has more overhead than one would intuitively expect: performs both a clone of the key
-    /// due to the [`OccupiedEntry`] type owning the key and also a hash of the key in order
-    /// to enable repairing the hash chain if the entry is removed.
-    pub fn entry_at_bucket(&self, pos: usize) -> Option<OccupiedEntry<'a, '_, K, V>> {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
-        if pos >= map.buckets.len() {
+    /// Optionally return reference to a bucket at a given index if it exists.
+    pub fn get_at_bucket(&self, pos: usize) -> Option<&V> {
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+        if pos >= map.bucket_arr.buckets.len() {
            return None;
        }

-		let entry = map.buckets[pos].inner.as_ref();
-		match entry {
-			Some((key, _)) => Some(OccupiedEntry {
-				_key: key.clone(),
-				bucket_pos: pos as u32,
-				prev_pos: entry::PrevPos::Unknown(
-					self.get_hash_value(key)
-				),
-				map,
-			}),
-			_ => None,
-		}
+		todo!("safely check if a given bucket is empty? always mark?");
    }

    /// Returns the number of buckets in the table.
    pub fn get_num_buckets(&self) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
+        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
        map.get_num_buckets()
    }

-    /// Return the key and value stored in bucket with given index. This can be used to
-    /// iterate through the hash map.
-    // TODO: An Iterator might be nicer. The communicator's clock algorithm needs to
-    // _slowly_ iterate through all buckets with its clock hand,  without holding a lock.
-    // If we switch to an Iterator, it must not hold the lock.
-    pub fn get_at_bucket(&self, pos: usize) -> Option<ValueReadGuard<(K, V)>> {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
-        if pos >= map.buckets.len() {
-            return None;
-        }
-        RwLockReadGuard::try_map(map, |m| m.buckets[pos].inner.as_ref()).ok()
-    }
-
    /// Returns the index of the bucket a given value corresponds to.
    pub fn get_bucket_for_value(&self, val_ptr: *const V) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
+        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();

-        let origin = map.buckets.as_ptr();
-        let idx = (val_ptr as usize - origin as usize) / size_of::<Bucket<K, V>>();
-        assert!(idx < map.buckets.len());
+        let origin = map.bucket_arr.buckets.as_ptr();
+        let idx = (val_ptr as usize - origin as usize) / size_of::<Bucket<V>>();
+        assert!(idx < map.bucket_arr.buckets.len());

        idx
    }

    /// Returns the number of occupied buckets in the table.
    pub fn get_num_buckets_in_use(&self) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
-        map.buckets_in_use as usize
+        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
+        map.bucket_arr.buckets_in_use.load(Ordering::Relaxed)
    }

    /// Clears all entries in a table. Does not reset any shrinking operations.
    pub fn clear(&self) {
-        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
        map.clear();
 	}
 	
-	/// Perform an in-place rehash of some region (0..`rehash_buckets`) of the table and reset
-	/// the `buckets` and `dictionary` slices to be as long as `num_buckets`. Resets the freelist
-	/// in the process.
-	fn rehash_dict(
+    pub fn rehash(
 		&self,
-		inner: &mut RwLockWriteGuard<'_, CoreHashMap<'a, K, V>>,
-		buckets_ptr: *mut core::Bucket<K, V>,
-		end_ptr: *mut u8,
-		num_buckets: u32,
-		rehash_buckets: u32,
+		shards: &mut Vec<RwLockWriteGuard<'_, DictShard<'_, K>>>,
+		rehash_buckets: usize
 	) {
-		inner.free_head = INVALID_POS;
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+		assert!(rehash_buckets <= map.get_num_buckets(), "rehashing subset of buckets");
+		shards.iter_mut().for_each(|x| x.keys.iter_mut().for_each(|key| {
+			if let EntryType::Occupied = key.tag {
+				key.tag = EntryType::Rehash;
+			}
+		}));
 		
-        let buckets;
-        let dictionary;
-        unsafe {
-            let buckets_end_ptr = buckets_ptr.add(num_buckets as usize);
-            let dictionary_ptr: *mut u32 = buckets_end_ptr
-                .byte_add(buckets_end_ptr.align_offset(align_of::<u32>()))
-                .cast();
-            let dictionary_size: usize =
-                end_ptr.byte_offset_from(buckets_end_ptr) as usize / size_of::<u32>();
-
-            buckets = std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize);
-            dictionary = std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size);
-        }
-        for e in dictionary.iter_mut() {
-            *e = INVALID_POS;
-        }
-
-        for (i, bucket) in buckets.iter_mut().enumerate().take(rehash_buckets as usize) {
-            if bucket.inner.is_none() {
-                bucket.next = inner.free_head;
-                inner.free_head = i as u32;
-                continue;
-            }
-
-            let hash = self.hasher.hash_one(&bucket.inner.as_ref().unwrap().0);
-            let pos: usize = (hash % dictionary.len() as u64) as usize;
-            bucket.next = dictionary[pos];
-            dictionary[pos] = i as u32;
-        }
-
-        inner.dictionary = dictionary;
-        inner.buckets = buckets;
+		todo!("solution with no memory allocation: split out metadata?")
    }

-    /// Rehash the map without growing or shrinking.
-    pub fn shuffle(&self) {
-        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
-        let num_buckets = map.get_num_buckets() as u32;
-        let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
-        let end_ptr: *mut u8 = unsafe { self.shared_ptr.byte_add(size_bytes).cast() };
-        let buckets_ptr = map.buckets.as_mut_ptr();
-        self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets);
+	pub fn shuffle(&self) {
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+		let mut shards: Vec<_> = map.dict_shards.iter().map(|x| x.write()).collect();
+		self.rehash(&mut shards, map.get_num_buckets());
    }
+	
+	fn reshard(&self, shards: &mut Vec<RwLockWriteGuard<'_, DictShard<'_, K>>>, num_buckets: usize) {
+		let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+		let shard_size = num_buckets / map.dict_shards.len();
+		for i in 0..map.dict_shards.len() {
+			let size = ((i + 1) * shard_size).min(num_buckets) - (i * shard_size);
+			unsafe {
+				shards[i].keys = std::slice::from_raw_parts_mut(shards[i].keys.as_mut_ptr(), size);
+				shards[i].idxs = std::slice::from_raw_parts_mut(shards[i].idxs.as_mut_ptr(), size);
+			}
+		}
+	}

-    /// Grow the number of buckets within the table.
-    ///
-    /// 1. Grows the underlying shared memory area
-    /// 2. Initializes new buckets and overwrites the current dictionary
-    /// 3. Rehashes the dictionary
-    ///
-    /// # Panics
-    /// Panics if called on a map initialized with [`HashMapInit::with_fixed`].
-    ///
-    /// # Errors
-    /// Returns an [`shmem::Error`] if any errors occur resizing the memory region.
-    pub fn grow(&self, num_buckets: u32) -> Result<(), shmem::Error> {
-        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
-        let old_num_buckets = map.buckets.len() as u32;
+	fn resize_shmem(&self, num_buckets: usize) -> Result<(), shmem::Error> {
+		let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+		let shmem_handles = self
+            .shmem_handles
+            .as_ref()
+            .expect("grow called on a fixed-size hash table");

+		let (keys_size, idxs_size, vals_size) =
+			HashMapInit::<K, V, S>::estimate_sizes(num_buckets, map.dict_shards.len());
+        shmem_handles.keys_shmem.set_size(keys_size)?;
+		shmem_handles.idxs_shmem.set_size(idxs_size)?;
+		shmem_handles.vals_shmem.set_size(vals_size)?;
+		Ok(())
+	}
+
+    pub fn grow(&self, num_buckets: usize) -> Result<(), shmem::Error> {
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+		let _resize_guard = self.resize_lock.lock();
+		let mut shards: Vec<_> = map.dict_shards.iter().map(|x| x.write()).collect();
+
+		let old_num_buckets = map.bucket_arr.buckets.len();
        assert!(
            num_buckets >= old_num_buckets,
            "grow called with a smaller number of buckets"
@@ -461,128 +412,114 @@ where
        if num_buckets == old_num_buckets {
            return Ok(());
        }
-        let shmem_handle = self
-            .shmem_handle
-            .as_ref()
-            .expect("grow called on a fixed-size hash table");

-        let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
-        shmem_handle.set_size(size_bytes)?;
-        let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
-
-        // Initialize new buckets. The new buckets are linked to the free list.
-        // NB: This overwrites the dictionary!
-        let buckets_ptr = map.buckets.as_mut_ptr();
+		// Grow memory areas and initialize each of them.
+		self.resize_shmem(num_buckets)?;                
        unsafe {
+			let buckets_ptr = map.bucket_arr.buckets.as_mut_ptr();
            for i in old_num_buckets..num_buckets {
-                let bucket = buckets_ptr.add(i as usize);
-                bucket.write(core::Bucket {
-                    next: if i < num_buckets - 1 {
-                        i + 1
+                let bucket = buckets_ptr.add(i);
+                bucket.write(Bucket::empty(
+                    if i < num_buckets - 1 {
+                        BucketIdx::new(i + 1)
                    } else {
-                        map.free_head
-                    },
-                    inner: None,
-                });
+                        map.bucket_arr.free_head.load(Ordering::Relaxed)
+                    }
+                ));
+            }
+
+			// TODO(quantumish) a bit questionable to use pointers here
+			let first_shard = &mut shards[0];
+			let keys_ptr = first_shard.keys.as_mut_ptr();			
+			for i in old_num_buckets..num_buckets {
+                let key = keys_ptr.add(i);
+                key.write(EntryKey {
+					tag: EntryType::Empty,
+					val: MaybeUninit::uninit(),
+				});
+            }
+			
+			let idxs_ptr = first_shard.idxs.as_mut_ptr();
+			for i in old_num_buckets..num_buckets {
+                let idx = idxs_ptr.add(i);
+                idx.write(BucketIdx::invalid());
            }
        }

-        self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, old_num_buckets);
-        map.free_head = old_num_buckets;
-
+		self.reshard(&mut shards, num_buckets);
+        self.rehash(&mut shards, old_num_buckets);
+        map.bucket_arr.free_head.store(
+			BucketIdx::new(old_num_buckets), Ordering::Relaxed
+		);
        Ok(())
    }

-    /// Begin a shrink, limiting all new allocations to be in buckets with index below `num_buckets`.
-    ///
-    /// # Panics
-    /// Panics if called on a map initialized with [`HashMapInit::with_fixed`] or if `num_buckets` is
-    /// greater than the number of buckets in the map.
-    pub fn begin_shrink(&mut self, num_buckets: u32) {
-        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
+    pub fn begin_shrink(&mut self, num_buckets: usize) {
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+		let _resize_guard = self.resize_lock.lock();
        assert!(
-            num_buckets <= map.get_num_buckets() as u32,
+            num_buckets <= map.get_num_buckets(),
            "shrink called with a larger number of buckets"
        );
        _ = self
-            .shmem_handle
+            .shmem_handles
            .as_ref()
            .expect("shrink called on a fixed-size hash table");
-        map.alloc_limit = num_buckets;
+        map.bucket_arr.alloc_limit.store(
+			BucketIdx::new(num_buckets), Ordering::SeqCst
+		);
    }

-    /// If a shrink operation is underway, returns the target size of the map. Otherwise, returns None.
+	// TODO(quantumish): Safety? Maybe replace this with expanded version of finish_shrink?
    pub fn shrink_goal(&self) -> Option<usize> {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap().read();
-        let goal = map.alloc_limit;
-		if goal == INVALID_POS { None } else { Some(goal as usize) }
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+        let goal = map.bucket_arr.alloc_limit.load(Ordering::Relaxed);
+		goal.pos_checked()
 	}
-	
-	/// Complete a shrink after caller has evicted entries, removing the unused buckets and rehashing.
-	///
-	/// # Panics
-	/// The following two cases result in a panic: 
-	/// - Calling this function on a map initialized with [`HashMapInit::with_fixed`].
-	/// - Calling this function on a map when no shrink operation is in progress.
-	///   there are more buckets in use than the value returned by [`HashMapAccess::shrink_goal`].
-	///
-	/// # Errors
-	/// Returns an [`shmem::Error`] if any errors occur resizing the memory region.
-	pub fn finish_shrink(&self) -> Result<(), shmem::Error> {
-		let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
-		assert!(
-			map.alloc_limit != INVALID_POS,
-			"called finish_shrink when no shrink is in progress"
-		);

-    /// Complete a shrink after caller has evicted entries, removing the unused buckets and rehashing.
-    ///
-    /// # Panics
-    /// The following cases result in a panic:
-    /// - Calling this function on a map initialized with [`HashMapInit::with_fixed`].
-    /// - Calling this function on a map when no shrink operation is in progress.
-    /// - Calling this function on a map with `shrink_mode` set to [`HashMapShrinkMode::Remap`] and
-    ///   there are more buckets in use than the value returned by [`HashMapAccess::shrink_goal`].
-    ///
-    /// # Errors
-    /// Returns an [`shmem::Error`] if any errors occur resizing the memory region.
    pub fn finish_shrink(&self) -> Result<(), shmem::Error> {
-        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
-        assert!(
-            map.alloc_limit != INVALID_POS,
-            "called finish_shrink when no shrink is in progress"
-        );
-
-        let num_buckets = map.alloc_limit;
-
-        if map.get_num_buckets() == num_buckets as usize {
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+		let _resize_guard = self.resize_lock.lock();
+		let mut shards: Vec<_> = map.dict_shards.iter().map(|x| x.write()).collect();
+		
+        let num_buckets = map.bucket_arr.alloc_limit
+			.load(Ordering::Relaxed)
+			.pos_checked()
+			.expect("called finish_shrink when no shrink is in progress");
+        
+        if map.get_num_buckets() == num_buckets {
            return Ok(());
        }

        assert!(
-            map.buckets_in_use <= num_buckets,
+            map.bucket_arr.buckets_in_use.load(Ordering::Relaxed) <= num_buckets,
            "called finish_shrink before enough entries were removed"
        );

-        for i in (num_buckets as usize)..map.buckets.len() {
-            if let Some((k, v)) = map.buckets[i].inner.take() {
-                // alloc_bucket increases count, so need to decrease since we're just moving
-                map.buckets_in_use -= 1;
-                map.alloc_bucket(k, v).unwrap();
-            }
-        }
+		// let shard_size = shards[0].len();
+        // for i in (num_buckets as usize)..map.buckets.len() {
+		// 	let shard_start = num_buckets / shard_size;
+		// 	let shard = shards[shard_start];
+		// 	let entry_start = num_buckets % shard_size;
+		// 	for entry_idx in entry_start..shard.len() {
+				
+		// 	}
+			
+		// 	if let EntryKey::Occupied(v) = map.[i].inner.take() {
+        //         // alloc_bucket increases count, so need to decrease since we're just moving
+        //         map.buckets_in_use.fetch_sub(1, Ordering::Relaxed);
+        //         map.alloc_bucket(k, v).unwrap();
+        //     }
+        // }

-        let shmem_handle = self
-            .shmem_handle
-            .as_ref()
-            .expect("shrink called on a fixed-size hash table");
+		todo!("dry way to handle reinsertion");

-        let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
-        shmem_handle.set_size(size_bytes)?;
-        let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
-        let buckets_ptr = map.buckets.as_mut_ptr();
-        self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets);
-        map.alloc_limit = INVALID_POS;
+		self.resize_shmem(num_buckets)?;
+
+		self.reshard(&mut shards, num_buckets);
+		
+        self.rehash(&mut shards, num_buckets);
+        map.bucket_arr.alloc_limit.store(BucketIdx::invalid(), Ordering::Relaxed);

        Ok(())
    }
--- a/libs/neon-shmem/src/hash/bucket.rs
+++ b/libs/neon-shmem/src/hash/bucket.rs
@@ -0,0 +1,232 @@
+
+use std::{mem::MaybeUninit, sync::atomic::{AtomicUsize, Ordering}};
+
+use atomic::Atomic;
+
+#[derive(bytemuck::NoUninit, Clone, Copy, PartialEq, Eq)]
+#[repr(transparent)]
+pub(crate) struct BucketIdx(pub(super) u32);
+
+impl BucketIdx {
+	pub const INVALID: u32 = 0x7FFFFFFF;
+	pub const MARK_TAG: u32 = 0x80000000;
+
+	pub(super) fn is_marked(&self) -> bool {
+		self.0 & Self::MARK_TAG != 0
+	}
+
+	pub(super) fn is_invalid(self) -> bool {
+		self.0 & Self::INVALID == Self::INVALID
+	}
+	
+	pub(super) fn as_marked(self) -> Self {
+		Self(self.0 | Self::MARK_TAG)
+	}
+
+	pub(super) fn get_unmarked(self) -> Self {
+		Self(self.0 & Self::INVALID)
+	}
+	
+	pub fn new(val: usize) -> Self {
+		Self(val as u32)
+	}
+
+	pub fn invalid() -> Self {
+		Self(Self::INVALID)
+	}
+
+	pub fn pos_checked(&self) -> Option<usize> {
+		if self.0 == Self::INVALID || self.is_marked() {
+			None
+		} else {
+			Some(self.0 as usize)
+		}
+	}
+}
+
+/// Fundamental storage unit within the hash table. Either empty or contains a key-value pair.
+/// Always part of a chain of some kind (either a freelist if empty or a hash chain if full).
+pub(crate) struct Bucket<V> {
+    pub val: MaybeUninit<V>,
+	pub next: Atomic<BucketIdx>,
+}
+
+impl<V> Bucket<V> {
+	pub fn empty(next: BucketIdx) -> Self {		
+		Self {
+			val: MaybeUninit::uninit(),
+			next: Atomic::new(next)
+		}
+	}
+
+	pub fn full(val: V) -> Self {
+		Self {
+			val: MaybeUninit::new(val),
+			next: Atomic::new(BucketIdx::invalid())
+		}
+	}
+
+	// pub is_full
+	
+	pub fn as_ref(&self) -> &V {
+		unsafe { self.val.assume_init_ref() }
+	}
+
+	pub fn as_mut(&mut self) -> &mut V {
+		unsafe { self.val.assume_init_mut() }
+	}
+
+	pub fn replace(&mut self, new_val: V) -> V {
+		unsafe { std::mem::replace(self.val.assume_init_mut(), new_val) }
+	}
+}
+
+pub(crate) struct BucketArray<'a, V> {
+	/// Buckets containing values.
+    pub(crate) buckets: &'a mut [Bucket<V>],
+    /// Head of the freelist.
+    pub(crate) free_head: Atomic<BucketIdx>,
+    /// Maximum index of a bucket allowed to be allocated.
+    pub(crate) alloc_limit: Atomic<BucketIdx>,
+    /// The number of currently occupied buckets.
+    pub(crate) buckets_in_use: AtomicUsize,
+    // Unclear what the purpose of this is.
+    pub(crate) _user_list_head: Atomic<BucketIdx>,
+}
+
+impl<'a, V> BucketArray<'a, V> {
+	pub fn new(buckets: &'a mut [Bucket<V>]) -> Self {
+		debug_assert!(Atomic::<BucketIdx>::is_lock_free());
+		Self {
+			buckets,
+			free_head: Atomic::new(BucketIdx(0)),
+			_user_list_head: Atomic::new(BucketIdx(0)),
+			alloc_limit: Atomic::new(BucketIdx::invalid()),
+			buckets_in_use: 0.into(),
+		}
+	}
+	
+	pub fn dealloc_bucket(&mut self, pos: usize) -> V {
+		let bucket = &mut self.buckets[pos];
+		let pos = BucketIdx::new(pos);
+		loop {
+			let free = self.free_head.load(Ordering::Relaxed);
+			bucket.next = Atomic::new(free);
+			if self.free_head.compare_exchange_weak(
+				free, pos, Ordering::Relaxed, Ordering::Relaxed
+			).is_ok() {
+				self.buckets_in_use.fetch_sub(1, Ordering::Relaxed);
+				return unsafe { bucket.val.assume_init_read() };
+			}
+		}
+	}
+
+	#[allow(unused_assignments)]
+	fn find_bucket(&self) -> (BucketIdx, BucketIdx) {
+		let mut left_node = BucketIdx::invalid();
+		let mut right_node = BucketIdx::invalid();
+		let mut left_node_next = BucketIdx::invalid();
+		
+		loop { 
+			let mut t = BucketIdx::invalid();
+			let mut t_next = self.free_head.load(Ordering::Relaxed);
+			let alloc_limit = self.alloc_limit.load(Ordering::Relaxed).pos_checked();
+			while t_next.is_marked() || t.pos_checked()
+				.map_or(true, |v| alloc_limit.map_or(false, |l| v > l))
+			{
+				if t_next.is_marked() {
+					left_node = t;
+					left_node_next = t_next;
+				}
+				t = t_next.get_unmarked();
+				if t.is_invalid() { break }
+				t_next = self.buckets[t.0 as usize].next.load(Ordering::Relaxed);
+			}
+			right_node = t;
+
+			if left_node_next == right_node {
+				if !right_node.is_invalid() && self.buckets[right_node.0 as usize]
+					.next.load(Ordering::Relaxed).is_marked()
+				{
+					continue;
+				} else {
+					return (left_node, right_node);
+				}
+			}
+
+			let left_ref = if !left_node.is_invalid() {
+				&self.buckets[left_node.0 as usize].next					
+			} else { &self.free_head };
+			
+			if left_ref.compare_exchange_weak(
+				left_node_next, right_node, Ordering::Relaxed, Ordering::Relaxed
+			).is_ok() {
+				if !right_node.is_invalid() && self.buckets[right_node.0 as usize]
+					.next.load(Ordering::Relaxed).is_marked()
+				{
+					continue;
+				} else {
+					return (left_node, right_node);
+				}
+			}
+		}
+	}
+
+	#[allow(unused_assignments)]
+    pub(crate) fn alloc_bucket(&mut self, value: V) -> Option<BucketIdx> {
+		let mut right_node_next = BucketIdx::invalid();
+		let mut left_idx = BucketIdx::invalid();
+		let mut right_idx = BucketIdx::invalid();
+		
+		loop {
+			(left_idx, right_idx) = self.find_bucket();
+			if right_idx.is_invalid() {
+				return None;
+			}
+			
+			let right = &self.buckets[right_idx.0 as usize];
+			right_node_next = right.next.load(Ordering::Relaxed);
+			if !right_node_next.is_marked() {
+				if right.next.compare_exchange_weak(
+					right_node_next, right_node_next.as_marked(),
+					Ordering::Relaxed, Ordering::Relaxed
+				).is_ok() {
+					break;
+				}
+			}
+		}
+
+		let left_ref = if !left_idx.is_invalid() {
+			&self.buckets[left_idx.0 as usize].next
+		} else {
+			&self.free_head
+		};
+		
+		if left_ref.compare_exchange_weak(
+			right_idx, right_node_next,
+			Ordering::Relaxed, Ordering::Relaxed
+		).is_err() {
+			todo!()
+		}
+
+        self.buckets_in_use.fetch_add(1, Ordering::Relaxed);
+		self.buckets[right_idx.0 as usize].val.write(value);
+		Some(right_idx)
+    }
+
+	pub fn clear(&mut self) {
+		for i in 0..self.buckets.len() {
+			self.buckets[i] = Bucket::empty(
+				if i < self.buckets.len() - 1 {
+					BucketIdx::new(i + 1)
+				} else {
+					BucketIdx::invalid()
+				}				
+			);
+        }
+
+		self.free_head.store(BucketIdx(0), Ordering::Relaxed);
+        self.buckets_in_use.store(0, Ordering::Relaxed);
+	}
+}
+
--- a/libs/neon-shmem/src/hash/core.rs
+++ b/libs/neon-shmem/src/hash/core.rs
@@ -3,35 +3,47 @@
 use std::hash::Hash;
 use std::mem::MaybeUninit;

-use crate::hash::entry::*;
+use crate::sync::*;
+use crate::hash::{
+	entry::*,
+	bucket::{BucketArray, Bucket, BucketIdx}
+};

-/// Invalid position within the map (either within the dictionary or bucket array).
-pub(crate) const INVALID_POS: u32 = u32::MAX;
+#[derive(PartialEq, Eq)]
+pub(crate) enum EntryType {
+	Occupied,
+	Rehash,
+	Tombstone,
+	RehashTombstone,
+	Empty,
+}

-/// Fundamental storage unit within the hash table. Either empty or contains a key-value pair.
-/// Always part of a chain of some kind (either a freelist if empty or a hash chain if full).
-pub(crate) struct Bucket<K, V> {
-    /// Index of next bucket in the chain.
-    pub(crate) next: u32,
-    /// Key-value pair contained within bucket.
-    pub(crate) inner: Option<(K, V)>,
+pub(crate) struct EntryKey<K> {
+	pub(crate) tag: EntryType,
+	pub(crate) val: MaybeUninit<K>,
+}
+
+pub(crate) struct DictShard<'a, K> {
+	pub(crate) keys: &'a mut [EntryKey<K>],
+	pub(crate) idxs: &'a mut [BucketIdx],
+}
+
+impl<'a, K> DictShard<'a, K> {
+	fn len(&self) -> usize {
+		self.keys.len()
+	}
+}
+
+pub(crate) struct MaybeUninitDictShard<'a, K> {
+	pub(crate) keys: &'a mut [MaybeUninit<EntryKey<K>>],
+	pub(crate) idxs: &'a mut [MaybeUninit<BucketIdx>],
 }

 /// Core hash table implementation.
 pub(crate) struct CoreHashMap<'a, K, V> {
-    /// Dictionary used to map hashes to bucket indices.
-    pub(crate) dictionary: &'a mut [u32],
-    /// Buckets containing key-value pairs.
-    pub(crate) buckets: &'a mut [Bucket<K, V>],
-    /// Head of the freelist.
-    pub(crate) free_head: u32,
-    /// Maximum index of a bucket allowed to be allocated. [`INVALID_POS`] if no limit.
-    pub(crate) alloc_limit: u32,
-    /// The number of currently occupied buckets.
-    pub(crate) buckets_in_use: u32,
-    // pub(crate) lock: libc::pthread_mutex_t,
-    // Unclear what the purpose of this is.
-    pub(crate) _user_list_head: u32,
+	/// Dictionary used to map hashes to bucket indices.
+    pub(crate) dict_shards: &'a mut [RwLock<DictShard<'a, K>>],
+	pub(crate) bucket_arr: BucketArray<'a, V>,
 }

 /// Error for when there are no empty buckets left but one is needed.
@@ -39,140 +51,170 @@ pub(crate) struct CoreHashMap<'a, K, V> {
 pub struct FullError();

 impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> {
-    const FILL_FACTOR: f32 = 0.60;
-
-    /// Estimate the size of data contained within the the hash map.
-    pub fn estimate_size(num_buckets: u32) -> usize {
-        let mut size = 0;
-
-        // buckets
-        size += size_of::<Bucket<K, V>>() * num_buckets as usize;
-
-        // dictionary
-        size += (f32::ceil((size_of::<u32>() * num_buckets as usize) as f32 / Self::FILL_FACTOR))
-            as usize;
-
-        size
-    }
-
    pub fn new(
-        buckets: &'a mut [MaybeUninit<Bucket<K, V>>],
-        dictionary: &'a mut [MaybeUninit<u32>],
+        buckets: &'a mut [MaybeUninit<Bucket<V>>],
+        dict_shards: &'a mut [RwLock<MaybeUninitDictShard<'a, K>>],
    ) -> Self {
        // Initialize the buckets
-        for i in 0..buckets.len() {
-            buckets[i].write(Bucket {
-                next: if i < buckets.len() - 1 {
-                    i as u32 + 1
-                } else {
-                    INVALID_POS
-                },
-                inner: None,
-            });
+		for i in 0..buckets.len() {
+			buckets[i].write(Bucket::empty(
+				if i < buckets.len() - 1 {
+					BucketIdx::new(i + 1)
+				} else {
+					BucketIdx::invalid()
+				})
+			);
        }

        // Initialize the dictionary
-        for e in dictionary.iter_mut() {
-            e.write(INVALID_POS);
-        }
+		for shard in dict_shards.iter_mut() {
+			let mut dicts = shard.write();
+			for e in dicts.keys.iter_mut() {
+				e.write(EntryKey {
+					tag: EntryType::Empty,
+					val: MaybeUninit::uninit(),
+				});
+			}
+			for e in dicts.idxs.iter_mut() {
+				e.write(BucketIdx::invalid());
+			}
+		}

        // TODO: use std::slice::assume_init_mut() once it stabilizes
        let buckets =
-            unsafe { std::slice::from_raw_parts_mut(buckets.as_mut_ptr().cast(), buckets.len()) };
-        let dictionary = unsafe {
-            std::slice::from_raw_parts_mut(dictionary.as_mut_ptr().cast(), dictionary.len())
+            unsafe { std::slice::from_raw_parts_mut(buckets.as_mut_ptr().cast(),
+													buckets.len()) };
+        let dict_shards = unsafe {
+            std::slice::from_raw_parts_mut(dict_shards.as_mut_ptr().cast(),
+										   dict_shards.len())
        };

        Self {
-            dictionary,
-            buckets,
-            free_head: 0,
-            buckets_in_use: 0,
-            _user_list_head: INVALID_POS,
-            alloc_limit: INVALID_POS,
+            dict_shards,
+			bucket_arr: BucketArray::new(buckets),
        }
    }
-
+	
    /// Get the value associated with a key (if it exists) given its hash.
-    pub fn get_with_hash(&self, key: &K, hash: u64) -> Option<&V> {
-        let mut next = self.dictionary[hash as usize % self.dictionary.len()];
-        loop {
-            if next == INVALID_POS {
-                return None;
-            }
+    pub fn get_with_hash(&'a self, key: &K, hash: u64) -> Option<ValueReadGuard<'a, V>> {
+		let num_buckets = self.get_num_buckets();
+		let shard_size = num_buckets / self.dict_shards.len();
+		let bucket_pos = hash as usize % num_buckets;
+		let shard_start = bucket_pos / shard_size;
+		for off in 0..self.dict_shards.len() {
+			let shard_idx = (shard_start + off) % self.dict_shards.len();
+			let shard = self.dict_shards[shard_idx].read();
+			let entry_start = if off == 0 { bucket_pos % shard_size } else { 0 };
+			for entry_idx in entry_start..shard.len() {
+				match shard.keys[entry_idx].tag {
+					EntryType::Empty => return None,
+					EntryType::Tombstone => continue, 
+					EntryType::Occupied => {
+						let cand_key = unsafe { shard.keys[entry_idx].val.assume_init_ref() };
+						if cand_key == key {
+							let bucket_idx = shard.idxs[entry_idx].pos_checked().expect("position is valid");
+							return Some(RwLockReadGuard::map(
+								shard, |_| self.bucket_arr.buckets[bucket_idx].as_ref()
+							));
+						}
+					},
+					_ => unreachable!(),
+				}
+			}
+		}
+		None
+	}

-            let bucket = &self.buckets[next as usize];
-            let (bucket_key, bucket_value) = bucket.inner.as_ref().expect("entry is in use");
-            if bucket_key == key {
-                return Some(bucket_value);
-            }
-            next = bucket.next;
-        }
-    }
+    pub fn entry_with_hash(&'a mut self, key: K, hash: u64) -> Result<Entry<'a, K, V>, FullError> {
+		// We need to keep holding on the locks for each shard we process since if we don't find the
+		// key anywhere, we want to insert it at the earliest possible position (which may be several
+		// shards away). Ideally cross-shard chains are quite rare, so this shouldn't be a big deal.
+		let mut shards = Vec::new();
+		let mut insert_pos = None;
+		let mut insert_shard = None;

+		let num_buckets = self.get_num_buckets();
+		let shard_size = num_buckets / self.dict_shards.len();
+		let bucket_pos = hash as usize % num_buckets;
+		let shard_start = bucket_pos / shard_size;
+		for off in 0..self.dict_shards.len() {
+			let shard_idx = (shard_start + off) % self.dict_shards.len();			
+			let shard = self.dict_shards[shard_idx].write();
+			let mut inserted = false;
+			let entry_start = if off == 0 { bucket_pos % shard_size } else { 0 };
+			for entry_idx in entry_start..shard.len() {
+				match shard.keys[entry_idx].tag {
+					EntryType::Empty => {
+						let (shard, shard_pos) = match (insert_shard, insert_pos) {
+							(Some(s), Some(p)) => (s, p),
+							(None, Some(p)) => (shard, p),
+							(None, None) => (shard, entry_idx),
+							_ => unreachable!()
+						};
+						return Ok(Entry::Vacant(VacantEntry {
+							_key: key,
+							shard,
+							shard_pos,
+							bucket_arr: &mut self.bucket_arr,
+						}))
+					},
+					EntryType::Tombstone => {
+						if insert_pos.is_none() {
+							insert_pos = Some(entry_idx);
+							inserted = true;
+						}
+					},
+					EntryType::Occupied => {
+						let cand_key = unsafe { shard.keys[entry_idx].val.assume_init_ref() };
+						if *cand_key == key {
+							let bucket_pos = shard.idxs[entry_idx].pos_checked().unwrap();
+							return Ok(Entry::Occupied(OccupiedEntry {
+								_key: key,
+								shard,
+								shard_pos: entry_idx,
+								bucket_pos,
+								bucket_arr: &mut self.bucket_arr,
+							}));
+						}	
+					}
+					_ => unreachable!(),
+				} 
+			}
+			if inserted {
+				insert_shard = Some(shard)
+			} else {
+				shards.push(shard);
+			}
+		}
+		
+		if let (Some(shard), Some(shard_pos)) = (insert_shard, insert_pos) {
+			Ok(Entry::Vacant(VacantEntry {
+				_key: key,
+				shard,
+				shard_pos,
+				bucket_arr: &mut self.bucket_arr,
+			}))
+		} else {
+			Err(FullError{})
+		}
+	}
+	
    /// Get number of buckets in map.
    pub fn get_num_buckets(&self) -> usize {
-        self.buckets.len()
+        self.bucket_arr.buckets.len()
    }

-    /// Clears all entries from the hashmap.
-    ///
-    /// Does not reset any allocation limits, but does clear any entries beyond them.
    pub fn clear(&mut self) {
-        for i in 0..self.buckets.len() {
-            self.buckets[i] = Bucket {
-                next: if i < self.buckets.len() - 1 {
-                    i as u32 + 1
-                } else {
-                    INVALID_POS
-                },
-                inner: None,
-            }
-        }
-        for i in 0..self.dictionary.len() {
-            self.dictionary[i] = INVALID_POS;
-        }
+		let mut shards: Vec<_> = self.dict_shards.iter().map(|x| x.write()).collect();
+        for shard in shards.iter_mut() {
+			for e in shard.keys.iter_mut() {
+				e.tag = EntryType::Empty;
+			}
+			for e in shard.idxs.iter_mut() {
+				*e = BucketIdx::invalid();
+			}
+		}

-        self.free_head = 0;
-        self.buckets_in_use = 0;
-    }
-
-    /// Find the position of an unused bucket via the freelist and initialize it.
-    pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result<u32, FullError> {
-        let mut pos = self.free_head;
-
-        // Find the first bucket we're *allowed* to use.
-        let mut prev = PrevPos::First(self.free_head);
-        while pos != INVALID_POS && pos >= self.alloc_limit {
-            let bucket = &mut self.buckets[pos as usize];
-            prev = PrevPos::Chained(pos);
-            pos = bucket.next;
-        }
-        if pos == INVALID_POS {
-            return Err(FullError());
-        }
-
-        // Repair the freelist.
-        match prev {
-            PrevPos::First(_) => {
-                let next_pos = self.buckets[pos as usize].next;
-                self.free_head = next_pos;
-            }
-            PrevPos::Chained(p) => {
-                if p != INVALID_POS {
-                    let next_pos = self.buckets[pos as usize].next;
-                    self.buckets[p as usize].next = next_pos;
-                }
-            }
-            _ => unreachable!(),
-        }
-
-        // Initialize the bucket.
-        let bucket = &mut self.buckets[pos as usize];
-        self.buckets_in_use += 1;
-        bucket.next = INVALID_POS;
-        bucket.inner = Some((key, value));
-
-        Ok(pos)
+        self.bucket_arr.clear();
    }
 }
--- a/libs/neon-shmem/src/hash/entry.rs
+++ b/libs/neon-shmem/src/hash/entry.rs
@@ -1,138 +1,76 @@
 //! Equivalent of [`std::collections::hash_map::Entry`] for this hashmap.

-use crate::hash::core::{CoreHashMap, FullError, INVALID_POS};
+use crate::hash::{
+	core::{DictShard, EntryType},
+	bucket::{BucketArray, BucketIdx}
+};
 use crate::sync::{RwLockWriteGuard, ValueWriteGuard};

 use std::hash::Hash;
-use std::mem;

-pub enum Entry<'a, 'b, K, V> {
-    Occupied(OccupiedEntry<'a, 'b, K, V>),
-    Vacant(VacantEntry<'a, 'b, K, V>),
+pub enum Entry<'a, K, V> {
+    Occupied(OccupiedEntry<'a, K, V>),
+    Vacant(VacantEntry<'a, K, V>),
 }

-/// Enum representing the previous position within a chain.
-#[derive(Clone, Copy)]
-pub(crate) enum PrevPos {
-    /// Starting index within the dictionary.  
-    First(u32),
-    /// Regular index within the buckets.
-    Chained(u32),
-    /// Unknown - e.g. the associated entry was retrieved by index instead of chain.
-    Unknown(u64),
-}
-
-pub struct OccupiedEntry<'a, 'b, K, V> {
-    /// Mutable reference to the map containing this entry.
-    pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>,
+pub struct OccupiedEntry<'a, K, V> {
    /// The key of the occupied entry
    pub(crate) _key: K,
-    /// The index of the previous entry in the chain.
-    pub(crate) prev_pos: PrevPos,
+    /// Mutable reference to the shard of the map the entry is in.
+    pub(crate) shard: RwLockWriteGuard<'a, DictShard<'a, K>>,
+	/// The position of the entry in the map.
+    pub(crate) shard_pos: usize,
+	/// Mutable reference to the bucket array containing entry.
+	pub(crate) bucket_arr: &'a mut BucketArray<'a, V>,
    /// The position of the bucket in the [`CoreHashMap`] bucket array.
-    pub(crate) bucket_pos: u32,
+    pub(crate) bucket_pos: usize,
 }

-impl<K, V> OccupiedEntry<'_, '_, K, V> {
+impl<K, V> OccupiedEntry<'_, K, V> {
    pub fn get(&self) -> &V {
-        &self.map.buckets[self.bucket_pos as usize]
-            .inner
-            .as_ref()
-            .unwrap()
-            .1
+		self.bucket_arr.buckets[self.bucket_pos].as_ref()
    }

    pub fn get_mut(&mut self) -> &mut V {
-        &mut self.map.buckets[self.bucket_pos as usize]
-            .inner
-            .as_mut()
-            .unwrap()
-            .1
+		self.bucket_arr.buckets[self.bucket_pos].as_mut()
    }

    /// Inserts a value into the entry, replacing (and returning) the existing value.
    pub fn insert(&mut self, value: V) -> V {
-        let bucket = &mut self.map.buckets[self.bucket_pos as usize];
-        // This assumes inner is Some, which it must be for an OccupiedEntry
-        mem::replace(&mut bucket.inner.as_mut().unwrap().1, value)
+        self.bucket_arr.buckets[self.bucket_pos].replace(value)
    }

    /// Removes the entry from the hash map, returning the value originally stored within it.
-    ///
-    /// This may result in multiple bucket accesses if the entry was obtained by index as the
-    /// previous chain entry needs to be discovered in this case.
-    ///
-    /// # Panics
-    /// Panics if the `prev_pos` field is equal to [`PrevPos::Unknown`]. In practice, this means
-    /// the entry was obtained via calling something like [`CoreHashMap::entry_at_bucket`].
-    pub fn remove(mut self) -> V {
-        // If this bucket was queried by index, go ahead and follow its chain from the start.
-        let prev = if let PrevPos::Unknown(hash) = self.prev_pos {
-            let dict_idx = hash as usize % self.map.dictionary.len();
-            let mut prev = PrevPos::First(dict_idx as u32);
-            let mut curr = self.map.dictionary[dict_idx];
-            while curr != self.bucket_pos {
-                assert!(curr != INVALID_POS);
-                prev = PrevPos::Chained(curr);
-                curr = self.map.buckets[curr as usize].next;
-            }
-            prev
-        } else {
-            self.prev_pos
-        };
-
-        // CoreHashMap::remove returns Option<(K, V)>. We know it's Some for an OccupiedEntry.
-        let bucket = &mut self.map.buckets[self.bucket_pos as usize];
-
-        // unlink it from the chain
-        match prev {
-            PrevPos::First(dict_pos) => {
-                self.map.dictionary[dict_pos as usize] = bucket.next;
-            }
-            PrevPos::Chained(bucket_pos) => {
-                // println!("we think prev of {} is {bucket_pos}", self.bucket_pos);
-                self.map.buckets[bucket_pos as usize].next = bucket.next;
-            }
-            _ => unreachable!(),
-        }
-
-        // and add it to the freelist
-        let free = self.map.free_head;
-        let bucket = &mut self.map.buckets[self.bucket_pos as usize];
-        let old_value = bucket.inner.take();
-        bucket.next = free;
-        self.map.free_head = self.bucket_pos;
-        self.map.buckets_in_use -= 1;
-
-        old_value.unwrap().1
+    pub fn remove(&mut self) -> V {
+		self.shard.idxs[self.shard_pos] = BucketIdx::invalid();
+		self.shard.keys[self.shard_pos].tag = EntryType::Tombstone;
+        self.bucket_arr.dealloc_bucket(self.bucket_pos)
    }
 }

 /// An abstract view into a vacant entry within the map.
-pub struct VacantEntry<'a, 'b, K, V> {
-    /// Mutable reference to the map containing this entry.
-    pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>,
-    /// The key to be inserted into this entry.
-    pub(crate) key: K,
-    /// The position within the dictionary corresponding to the key's hash.
-    pub(crate) dict_pos: u32,
+pub struct VacantEntry<'a, K, V> {
+    /// The key of the occupied entry
+    pub(crate) _key: K,
+    /// Mutable reference to the shard of the map the entry is in.
+    pub(crate) shard: RwLockWriteGuard<'a, DictShard<'a, K>>,
+	/// The position of the entry in the map.
+    pub(crate) shard_pos: usize,
+	/// Mutable reference to the bucket array containing entry.
+	pub(crate) bucket_arr: &'a mut BucketArray<'a, V>,
 }

-impl<'b, K: Clone + Hash + Eq, V> VacantEntry<'_, 'b, K, V> {
+impl<'a, K: Clone + Hash + Eq, V> VacantEntry<'a, K, V> {
    /// Insert a value into the vacant entry, finding and populating an empty bucket in the process.
-    ///
-    /// # Errors
-    /// Will return [`FullError`] if there are no unoccupied buckets in the map.
-    pub fn insert(mut self, value: V) -> Result<ValueWriteGuard<'b, V>, FullError> {
-        let pos = self.map.alloc_bucket(self.key, value)?;
-        if pos == INVALID_POS {
-            return Err(FullError());
-        }
-        self.map.buckets[pos as usize].next = self.map.dictionary[self.dict_pos as usize];
-        self.map.dictionary[self.dict_pos as usize] = pos;
+    pub fn insert(mut self, value: V) -> ValueWriteGuard<'a, V> {
+		let pos = self.bucket_arr.alloc_bucket(value).expect("bucket is available if entry is");
+		self.shard.keys[self.shard_pos].tag = EntryType::Occupied;
+		self.shard.keys[self.shard_pos].val.write(self._key);
+		let idx = pos.pos_checked().expect("position is valid");
+		self.shard.idxs[self.shard_pos] = pos;

-        Ok(RwLockWriteGuard::map(self.map, |m| {
-            &mut m.buckets[pos as usize].inner.as_mut().unwrap().1
-        }))
+        RwLockWriteGuard::map(self.shard, |_| {
+            self.bucket_arr.buckets[idx].as_mut()
+        })
    }
 }
--- a/libs/neon-shmem/src/hash/tests.rs
+++ b/libs/neon-shmem/src/hash/tests.rs
@@ -36,18 +36,17 @@ impl<'a> From<&'a [u8]> for TestKey {
 }

 fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
-    let w = HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, "test_inserts")
+    let w = HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, 100, "test_inserts")
        .attach_writer();

    for (idx, k) in keys.iter().enumerate() {
        let res = w.entry((*k).into());
-        match res {
+        match res.unwrap() {
            Entry::Occupied(mut e) => {
                e.insert(idx);
            }
            Entry::Vacant(e) => {
-                let res = e.insert(idx);
-                assert!(res.is_ok());
+                _ = e.insert(idx);
            }
        };
    }
@@ -112,15 +111,15 @@ fn apply_op(

    let entry = map.entry(op.0);
    let hash_existing = match op.1 {
-        Some(new) => match entry {
+        Some(new) => match entry.unwrap() {
            Entry::Occupied(mut e) => Some(e.insert(new)),
            Entry::Vacant(e) => {
-                _ = e.insert(new).unwrap();
+                _ = e.insert(new);
                None
            }
        },
-        None => match entry {
-            Entry::Occupied(e) => Some(e.remove()),
+        None => match entry.unwrap() {
+            Entry::Occupied(mut e) => Some(e.remove()),
            Entry::Vacant(_) => None,
        },
    };
@@ -164,15 +163,15 @@ fn do_deletes(
 fn do_shrink(
    writer: &mut HashMapAccess<TestKey, usize>,
    shadow: &mut BTreeMap<TestKey, usize>,
-    to: u32,
+    to: usize,
 ) {
    assert!(writer.shrink_goal().is_none());
    writer.begin_shrink(to);
    assert_eq!(writer.shrink_goal(), Some(to as usize));
    while writer.get_num_buckets_in_use() > to as usize {
        let (k, _) = shadow.pop_first().unwrap();
-        let entry = writer.entry(k);
-        if let Entry::Occupied(e) = entry {
+        let entry = writer.entry(k).unwrap();
+        if let Entry::Occupied(mut e) = entry {
            e.remove();
        }
    }
@@ -185,7 +184,7 @@ fn do_shrink(
 #[test]
 fn random_ops() {
    let mut writer =
-        HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, "test_random")
+        HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, 10, "test_random")
            .attach_writer();
    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();

@@ -200,153 +199,153 @@ fn random_ops() {
    }
 }

-#[test]
-fn test_shuffle() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, "test_shuf")
-        .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
+// #[test]
+// fn test_shuffle() {
+//     let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, 10, "test_shuf")
+//         .attach_writer();
+//     let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+//     let mut rng = rand::rng();

-    do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
-    writer.shuffle();
-    do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
-}
+//     do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
+//     writer.shuffle();
+//     do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
+// }

-#[test]
-fn test_grow() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 2000, "test_grow")
-        .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
+// #[test]
+// fn test_grow() {
+//     let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 2000, 10, "test_grow")
+//         .attach_writer();
+//     let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+//     let mut rng = rand::rng();

-    do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
-    let old_usage = writer.get_num_buckets_in_use();
-    writer.grow(1500).unwrap();
-    assert_eq!(writer.get_num_buckets_in_use(), old_usage);
-    assert_eq!(writer.get_num_buckets(), 1500);
-    do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
-}
+//     do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
+//     let old_usage = writer.get_num_buckets_in_use();
+//     writer.grow(1500).unwrap();
+//     assert_eq!(writer.get_num_buckets_in_use(), old_usage);
+//     assert_eq!(writer.get_num_buckets(), 1500);
+//     do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
+// }

 #[test]
 fn test_clear() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
+    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, 10, "test_clear")
        .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
-    do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
-    writer.clear();
-    assert_eq!(writer.get_num_buckets_in_use(), 0);
-    assert_eq!(writer.get_num_buckets(), 1500);
-    while let Some((key, _)) = shadow.pop_first() {
-        assert!(writer.get(&key).is_none());
-    }
-    do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
-    for i in 0..(1500 - writer.get_num_buckets_in_use()) {
-        writer.insert((1500 + i as u128).into(), 0).unwrap();
-    }
-    assert_eq!(writer.insert(5000.into(), 0), Err(FullError {}));
-    writer.clear();
-    assert!(writer.insert(5000.into(), 0).is_ok());
+    // let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+    // let mut rng = rand::rng();
+    // do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
+    // writer.clear();
+    // assert_eq!(writer.get_num_buckets_in_use(), 0);
+    // assert_eq!(writer.get_num_buckets(), 1500);
+    // while let Some((key, _)) = shadow.pop_first() {
+    //     assert!(writer.get(&key).is_none());
+    // }
+    // do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
+    // for i in 0..(1500 - writer.get_num_buckets_in_use()) {
+    //     writer.insert((1500 + i as u128).into(), 0).unwrap();
+    // }
+    // assert_eq!(writer.insert(5000.into(), 0), Err(FullError {}));
+    // writer.clear();
+    // assert!(writer.insert(5000.into(), 0).is_ok());
 }

-#[test]
-fn test_idx_remove() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
-        .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
-    do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
-    for _ in 0..100 {
-        let idx = (rng.next_u32() % 1500) as usize;
-        if let Some(e) = writer.entry_at_bucket(idx) {
-            shadow.remove(&e._key);
-            e.remove();
-        }
-    }
-    while let Some((key, val)) = shadow.pop_first() {
-        assert_eq!(*writer.get(&key).unwrap(), val);
-    }
-}
+// #[test]
+// fn test_idx_remove() {
+//     let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, 10, "test_clear")
+//         .attach_writer();
+//     let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+//     let mut rng = rand::rng();
+//     do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
+//     for _ in 0..100 {
+//         let idx = (rng.next_u32() % 1500) as usize;
+//         if let Some(e) = writer.entry_at_bucket(idx) {
+//             shadow.remove(&e._key);
+//             e.remove();
+//         }
+//     }
+//     while let Some((key, val)) = shadow.pop_first() {
+//         assert_eq!(*writer.get(&key).unwrap(), val);
+//     }
+// }

-#[test]
-fn test_idx_get() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
-        .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
-    do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
-    for _ in 0..100 {
-        let idx = (rng.next_u32() % 1500) as usize;
-        if let Some(pair) = writer.get_at_bucket(idx) {
-            {
-                let v: *const usize = &pair.1;
-                assert_eq!(writer.get_bucket_for_value(v), idx);
-            }
-            {
-                let v: *const usize = &pair.1;
-                assert_eq!(writer.get_bucket_for_value(v), idx);
-            }
-        }
-    }
-}
+// #[test]
+// fn test_idx_get() {
+//     let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
+//         .attach_writer();
+//     let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+//     let mut rng = rand::rng();
+//     do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
+//     for _ in 0..100 {
+//         let idx = (rng.next_u32() % 1500) as usize;
+//         if let Some(pair) = writer.get_at_bucket(idx) {
+//             {
+//                 let v: *const usize = &pair.1;
+//                 assert_eq!(writer.get_bucket_for_value(v), idx);
+//             }
+//             {
+//                 let v: *const usize = &pair.1;
+//                 assert_eq!(writer.get_bucket_for_value(v), idx);
+//             }
+//         }
+//     }
+// }

-#[test]
-fn test_shrink() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink")
-        .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
+// #[test]
+// fn test_shrink() {
+//     let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink")
+//         .attach_writer();
+//     let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+//     let mut rng = rand::rng();

-    do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
-    do_shrink(&mut writer, &mut shadow, 1000);
-    assert_eq!(writer.get_num_buckets(), 1000);
-    do_deletes(500, &mut writer, &mut shadow);
-    do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng);
-    assert!(writer.get_num_buckets_in_use() <= 1000);
-}
+//     do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
+//     do_shrink(&mut writer, &mut shadow, 1000);
+//     assert_eq!(writer.get_num_buckets(), 1000);
+//     do_deletes(500, &mut writer, &mut shadow);
+//     do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng);
+//     assert!(writer.get_num_buckets_in_use() <= 1000);
+// }

-#[test]
-fn test_shrink_grow_seq() {
-    let mut writer =
-        HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 20000, "test_grow_seq")
-            .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
+// #[test]
+// fn test_shrink_grow_seq() {
+//     let mut writer =
+//         HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 20000, "test_grow_seq")
+//             .attach_writer();
+//     let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+//     let mut rng = rand::rng();

-    do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng);
-    eprintln!("Shrinking to 750");
-    do_shrink(&mut writer, &mut shadow, 750);
-    do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng);
-    eprintln!("Growing to 1500");
-    writer.grow(1500).unwrap();
-    do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng);
-    eprintln!("Shrinking to 200");
-    while shadow.len() > 100 {
-        do_deletes(1, &mut writer, &mut shadow);
-    }
-    do_shrink(&mut writer, &mut shadow, 200);
-    do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
-    eprintln!("Growing to 10k");
-    writer.grow(10000).unwrap();
-    do_random_ops(10000, 5000, 0.25, &mut writer, &mut shadow, &mut rng);
-}
+//     do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng);
+//     eprintln!("Shrinking to 750");
+//     do_shrink(&mut writer, &mut shadow, 750);
+//     do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng);
+//     eprintln!("Growing to 1500");
+//     writer.grow(1500).unwrap();
+//     do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng);
+//     eprintln!("Shrinking to 200");
+//     while shadow.len() > 100 {
+//         do_deletes(1, &mut writer, &mut shadow);
+//     }
+//     do_shrink(&mut writer, &mut shadow, 200);
+//     do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
+//     eprintln!("Growing to 10k");
+//     writer.grow(10000).unwrap();
+//     do_random_ops(10000, 5000, 0.25, &mut writer, &mut shadow, &mut rng);
+// }

 #[test]
 fn test_bucket_ops() {
-    let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, "test_bucket_ops")
+    let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, 10, "test_bucket_ops")
        .attach_writer();
-    match writer.entry(1.into()) {
+    match writer.entry(1.into()).unwrap() {
        Entry::Occupied(mut e) => {
            e.insert(2);
        }
        Entry::Vacant(e) => {
-            _ = e.insert(2).unwrap();
-        }
+            _ = e.insert(2);
+        },
    }
    assert_eq!(writer.get_num_buckets_in_use(), 1);
    assert_eq!(writer.get_num_buckets(), 1000);
    assert_eq!(*writer.get(&1.into()).unwrap(), 2);
-    let pos = match writer.entry(1.into()) {
+    let pos = match writer.entry(1.into()).unwrap() {
        Entry::Occupied(e) => {
            assert_eq!(e._key, 1.into());
            let pos = e.bucket_pos as usize;
@@ -356,8 +355,7 @@ fn test_bucket_ops() {
            panic!("Insert didn't affect entry");
        }
    };
-    assert_eq!(writer.entry_at_bucket(pos).unwrap()._key, 1.into());
-    assert_eq!(*writer.get_at_bucket(pos).unwrap(), (1.into(), 2));
+    assert_eq!(writer.get_at_bucket(pos).unwrap(), &2);
    {
        let ptr: *const usize = &*writer.get(&1.into()).unwrap();
        assert_eq!(writer.get_bucket_for_value(ptr), pos);
@@ -366,64 +364,64 @@ fn test_bucket_ops() {
    assert!(writer.get(&1.into()).is_none());
 }

-#[test]
-fn test_shrink_zero() {
-    let mut writer =
-        HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink_zero")
-            .attach_writer();
-    writer.begin_shrink(0);
-    for i in 0..1500 {
-        writer.entry_at_bucket(i).map(|x| x.remove());
-    }
-    writer.finish_shrink().unwrap();
-    assert_eq!(writer.get_num_buckets_in_use(), 0);
-    let entry = writer.entry(1.into());
-    if let Entry::Vacant(v) = entry {
-        assert!(v.insert(2).is_err());
-    } else {
-        panic!("Somehow got non-vacant entry in empty map.")
-    }
-    writer.grow(50).unwrap();
-    let entry = writer.entry(1.into());
-    if let Entry::Vacant(v) = entry {
-        assert!(v.insert(2).is_ok());
-    } else {
-        panic!("Somehow got non-vacant entry in empty map.")
-    }
-    assert_eq!(writer.get_num_buckets_in_use(), 1);
-}
+// #[test]
+// fn test_shrink_zero() {
+//     let mut writer =
+//         HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink_zero")
+//             .attach_writer();
+//     writer.begin_shrink(0);
+//     for i in 0..1500 {
+//         writer.entry_at_bucket(i).map(|x| x.remove());
+//     }
+//     writer.finish_shrink().unwrap();
+//     assert_eq!(writer.get_num_buckets_in_use(), 0);
+//     let entry = writer.entry(1.into());
+//     if let Entry::Vacant(v) = entry {
+//         assert!(v.insert(2).is_err());
+//     } else {
+//         panic!("Somehow got non-vacant entry in empty map.")
+//     }
+//     writer.grow(50).unwrap();
+//     let entry = writer.entry(1.into());
+//     if let Entry::Vacant(v) = entry {
+//         assert!(v.insert(2).is_ok());
+//     } else {
+//         panic!("Somehow got non-vacant entry in empty map.")
+//     }
+//     assert_eq!(writer.get_num_buckets_in_use(), 1);
+// }

-#[test]
-#[should_panic]
-fn test_grow_oom() {
-    let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_grow_oom")
-        .attach_writer();
-    writer.grow(20000).unwrap();
-}
+// #[test]
+// #[should_panic]
+// fn test_grow_oom() {
+//     let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_grow_oom")
+//         .attach_writer();
+//     writer.grow(20000).unwrap();
+// }

-#[test]
-#[should_panic]
-fn test_shrink_bigger() {
-    let mut writer =
-        HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_bigger")
-            .attach_writer();
-    writer.begin_shrink(2000);
-}
+// #[test]
+// #[should_panic]
+// fn test_shrink_bigger() {
+//     let mut writer =
+//         HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_bigger")
+//             .attach_writer();
+//     writer.begin_shrink(2000);
+// }

-#[test]
-#[should_panic]
-fn test_shrink_early_finish() {
-    let writer =
-        HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_early_finish")
-            .attach_writer();
-    writer.finish_shrink().unwrap();
-}
+// #[test]
+// #[should_panic]
+// fn test_shrink_early_finish() {
+//     let writer =
+//         HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_early_finish")
+//             .attach_writer();
+//     writer.finish_shrink().unwrap();
+// }

-#[test]
-#[should_panic]
-fn test_shrink_fixed_size() {
-    let mut area = [MaybeUninit::uninit(); 10000];
-    let init_struct = HashMapInit::<TestKey, usize>::with_fixed(3, &mut area);
-    let mut writer = init_struct.attach_writer();
-    writer.begin_shrink(1);
-}
+// #[test]
+// #[should_panic]
+// fn test_shrink_fixed_size() {
+//     let mut area = [MaybeUninit::uninit(); 10000];
+//     let init_struct = HashMapInit::<TestKey, usize>::with_fixed(3, &mut area);
+//     let mut writer = init_struct.attach_writer();
+//     writer.begin_shrink(1);
+// }
--- a/libs/neon-shmem/src/sync.rs
+++ b/libs/neon-shmem/src/sync.rs
@@ -6,6 +6,7 @@ use std::ptr::NonNull;
 use nix::errno::Errno;

 pub type RwLock<T> = lock_api::RwLock<PthreadRwLock, T>;
+pub type Mutex<T> = lock_api::Mutex<PthreadMutex, T>;
 pub(crate) type RwLockReadGuard<'a, T> = lock_api::RwLockReadGuard<'a, PthreadRwLock, T>;
 pub type RwLockWriteGuard<'a, T> = lock_api::RwLockWriteGuard<'a, PthreadRwLock, T>;
 pub type ValueReadGuard<'a, T> = lock_api::MappedRwLockReadGuard<'a, PthreadRwLock, T>;
@@ -43,7 +44,7 @@ impl PthreadRwLock {
 	
 	fn inner(&self) -> NonNull<libc::pthread_rwlock_t> {
 		self.0.unwrap_or_else(
-			|| panic!("PthreadRwLock constructed badly - something likely used RawMutex::INIT")
+			|| panic!("PthreadRwLock constructed badly - something likely used RawRwLock::INIT")
 		)
 	}

@@ -105,3 +106,64 @@ unsafe impl lock_api::RawRwLock for PthreadRwLock {
 		self.unlock();
 	}
 }
+
+pub struct PthreadMutex(Option<NonNull<libc::pthread_mutex_t>>);
+
+impl PthreadMutex {
+	pub fn new(lock: NonNull<libc::pthread_mutex_t>) -> Self {
+		unsafe {
+			let mut attrs = MaybeUninit::uninit();
+			// Ignoring return value here - only possible error is OOM.
+			libc::pthread_mutexattr_init(attrs.as_mut_ptr());
+			libc::pthread_mutexattr_setpshared(
+				attrs.as_mut_ptr(),
+				libc::PTHREAD_PROCESS_SHARED
+			);
+			libc::pthread_mutex_init(lock.as_ptr(), attrs.as_mut_ptr());
+			// Safety: POSIX specifies that "any function affecting the attributes
+			// object (including destruction) shall not affect any previously
+			// initialized read-write locks". 
+			libc::pthread_mutexattr_destroy(attrs.as_mut_ptr());
+			Self(Some(lock))
+		}
+	}
+
+	fn inner(&self) -> NonNull<libc::pthread_mutex_t> {
+		self.0.unwrap_or_else(
+			|| panic!("PthreadMutex constructed badly - something likely used RawMutex::INIT")
+		)
+	}
+
+}
+
+unsafe impl lock_api::RawMutex for PthreadMutex {
+	type GuardMarker = lock_api::GuardSend;
+
+	/// *DO NOT USE THIS.* See [`PthreadRwLock`] for the full explanation.
+	const INIT: Self = Self(None);	
+
+	fn lock(&self) {
+		unsafe {
+			let res = libc::pthread_mutex_lock(self.inner().as_ptr());
+			assert!(res == 0, "lock failed with {}", Errno::from_raw(res));
+		}
+	}
+
+	fn try_lock(&self) -> bool {
+		unsafe {
+			let res = libc::pthread_mutex_trylock(self.inner().as_ptr());
+			match res {
+				0 => true,
+				libc::EAGAIN => false,
+				o => panic!("try_rdlock failed with {}", Errno::from_raw(o)),
+			}
+		}
+	}
+
+	unsafe fn unlock(&self) {
+		unsafe {
+			let res = libc::pthread_mutex_unlock(self.inner().as_ptr());
+			assert!(res == 0, "unlock failed with {}", Errno::from_raw(res));
+		}
+	}
+}