Merge branch 'quantumish/comm-lfc-integration' into communicator-rewrite

2026-06-03 13:30:38 +00:00 · 2025-07-03 10:52:29 -07:00
parent 42e4e5a418 86fb7b966a
commit 794bb7a9e8
11 changed files with 3260 additions and 1962 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/libs/neon-shmem/Cargo.toml
+++ b/libs/neon-shmem/Cargo.toml
@@ -7,12 +7,26 @@ license.workspace = true
 [dependencies]
 thiserror.workspace = true
 nix.workspace = true
-spin.workspace = true
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
+rustc-hash = { version = "2.1.1" }
+rand = "0.9.1"
+libc.workspace = true
+lock_api = "0.4.13"

 [dev-dependencies]
-rand = "0.9.1"
+criterion = { workspace = true, features = ["html_reports"] }
 rand_distr = "0.5.1"
+xxhash-rust = { version = "0.8.15", features = ["xxh3"] }
+ahash.workspace = true
+twox-hash = { version = "2.1.1" }
+seahash = "4.1.0"
+hashbrown = { git = "https://github.com/quantumish/hashbrown.git", rev = "6610e6d" }
+foldhash = "0.1.5"
+

 [target.'cfg(target_os = "macos")'.dependencies]
 tempfile = "3.14.0"
+
+[[bench]]
+name = "hmap_resize"
+harness = false
--- a/libs/neon-shmem/benches/hmap_resize.rs
+++ b/libs/neon-shmem/benches/hmap_resize.rs
@@ -0,0 +1,282 @@
+use criterion::{criterion_group, criterion_main, BatchSize, Criterion, BenchmarkId};
+use neon_shmem::hash::HashMapAccess;
+use neon_shmem::hash::HashMapInit;
+use neon_shmem::hash::entry::Entry;
+use rand::prelude::*;
+use rand::distr::{Distribution, StandardUniform};
+use std::hash::BuildHasher;
+use std::default::Default;
+	
+// Taken from bindings to C code
+
+#[derive(Clone, Debug, Hash, Eq, PartialEq)]
+#[repr(C)]
+pub struct FileCacheKey {
+    pub _spc_id: u32,
+    pub _db_id: u32,
+    pub _rel_number: u32,
+    pub _fork_num: u32,
+    pub _block_num: u32,
+}
+
+impl Distribution<FileCacheKey> for StandardUniform {
+	// questionable, but doesn't need to be good randomness
+	fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> FileCacheKey {
+		FileCacheKey {
+			_spc_id: rng.random(),
+			_db_id: rng.random(),
+			_rel_number: rng.random(),
+			_fork_num: rng.random(),
+			_block_num: rng.random()
+		}
+    }
+}
+
+#[derive(Clone, Debug)]
+#[repr(C)]
+pub struct FileCacheEntry {
+    pub _offset: u32,
+    pub _access_count: u32,
+    pub _prev: *mut FileCacheEntry,
+    pub _next: *mut FileCacheEntry,
+    pub _state: [u32; 8],
+}
+
+impl FileCacheEntry {
+	fn dummy() -> Self {
+		Self {
+			_offset: 0,
+			_access_count: 0,
+			_prev: std::ptr::null_mut(),
+			_next: std::ptr::null_mut(),
+			_state: [0; 8]				
+		}
+	}
+}
+
+// Utilities for applying operations.
+
+#[derive(Clone, Debug)]
+struct TestOp<K,V>(K, Option<V>);
+
+fn apply_op<K: Clone + std::hash::Hash + Eq, V, S: std::hash::BuildHasher>(
+    op: TestOp<K,V>,
+    map: &mut HashMapAccess<K,V,S>,
+) {
+	let entry = map.entry(op.0);
+
+    match op.1 {
+		Some(new) => {
+			match entry {
+				Entry::Occupied(mut e) => Some(e.insert(new)),
+				Entry::Vacant(e) => { _ = e.insert(new).unwrap(); None },
+			}
+		},
+		None => {
+			match entry {
+				Entry::Occupied(e) => Some(e.remove()),
+				Entry::Vacant(_) => None,
+			}
+		},
+	};
+}
+
+// Hash utilities
+
+struct SeaRandomState {
+	k1: u64,
+	k2: u64,
+	k3: u64,
+	k4: u64 
+}
+
+impl std::hash::BuildHasher for SeaRandomState {
+	type Hasher = seahash::SeaHasher;
+	
+	fn build_hasher(&self) -> Self::Hasher {
+		seahash::SeaHasher::with_seeds(self.k1, self.k2, self.k3, self.k4)
+	}
+}
+
+impl SeaRandomState {
+	fn new() -> Self {
+		let mut rng = rand::rng();
+		Self { k1: rng.random(), k2: rng.random(), k3: rng.random(), k4: rng.random() }
+	}
+}
+
+fn small_benchs(c: &mut Criterion) {
+	let mut group = c.benchmark_group("Small maps");
+    group.sample_size(10);
+        
+	group.bench_function("small_rehash", |b| {
+		let ideal_filled = 4_000_000;
+		let size = 5_000_000;
+		let mut writer = HashMapInit::new_resizeable(size, size * 2).attach_writer();
+		let mut rng = rand::rng();		
+		while writer.get_num_buckets_in_use() < ideal_filled as usize {
+			let key: FileCacheKey = rng.random();
+			let val = FileCacheEntry::dummy();
+			apply_op(TestOp(key, Some(val)), &mut writer);
+		}
+		b.iter(|| writer.shuffle());
+	});
+	
+
+	group.bench_function("small_rehash_xxhash", |b| {
+		let ideal_filled = 4_000_000;
+		let size = 5_000_000;
+		let mut writer = HashMapInit::new_resizeable(size, size * 2)
+			.with_hasher(twox_hash::xxhash64::RandomState::default())
+			.attach_writer();
+		let mut rng = rand::rng();		
+		while writer.get_num_buckets_in_use() < ideal_filled as usize {
+			let key: FileCacheKey = rng.random();
+			let val = FileCacheEntry::dummy();
+			apply_op(TestOp(key, Some(val)), &mut writer);
+		}
+		b.iter(|| writer.shuffle());
+	});
+
+	
+	group.bench_function("small_rehash_ahash", |b| {
+		let ideal_filled = 4_000_000;
+		let size = 5_000_000;
+		let mut writer = HashMapInit::new_resizeable(size, size * 2)
+			.with_hasher(ahash::RandomState::default())
+			.attach_writer();
+		let mut rng = rand::rng();		
+		while writer.get_num_buckets_in_use() < ideal_filled as usize {
+			let key: FileCacheKey = rng.random();
+			let val = FileCacheEntry::dummy();
+			apply_op(TestOp(key, Some(val)), &mut writer);
+		}
+		b.iter(|| writer.shuffle());
+	});
+
+	group.bench_function("small_rehash_seahash", |b| {
+		let ideal_filled = 4_000_000;
+		let size = 5_000_000;
+		let mut writer = HashMapInit::new_resizeable(size, size * 2)
+			.with_hasher(SeaRandomState::new())
+			.attach_writer();
+		let mut rng = rand::rng();
+		while writer.get_num_buckets_in_use() < ideal_filled as usize {
+			let key: FileCacheKey = rng.random();
+			let val = FileCacheEntry::dummy();
+			apply_op(TestOp(key, Some(val)), &mut writer);
+		}
+		b.iter(|| writer.shuffle());
+	});
+
+	group.finish();	
+}
+
+fn real_benchs(c: &mut Criterion) {
+	let mut group = c.benchmark_group("Realistic workloads");
+	group.sample_size(10);	
+    group.bench_function("real_bulk_insert", |b| {
+		let size = 125_000_000;
+		let ideal_filled = 100_000_000;		
+		let mut rng = rand::rng();		
+		b.iter_batched(
+			|| HashMapInit::new_resizeable(size, size * 2).attach_writer(),
+			|writer| {
+				for _ in 0..ideal_filled {
+					let key: FileCacheKey = rng.random();
+					let val = FileCacheEntry::dummy();
+					let entry = writer.entry(key);
+					std::hint::black_box(match entry {
+						Entry::Occupied(mut e) => { e.insert(val); },
+						Entry::Vacant(e) => { _ = e.insert(val).unwrap(); },
+					})
+				}	
+			},
+			BatchSize::SmallInput,
+		)
+	});
+
+	group.bench_function("real_rehash", |b| {
+		let size = 125_000_000;
+		let ideal_filled = 100_000_000;		
+		let mut writer = HashMapInit::new_resizeable(size, size).attach_writer();
+		let mut rng = rand::rng();		
+		while writer.get_num_buckets_in_use() < ideal_filled {
+			let key: FileCacheKey = rng.random();
+			let val = FileCacheEntry::dummy();
+			apply_op(TestOp(key, Some(val)), &mut writer);
+		}
+		b.iter(|| writer.shuffle());
+	});
+	
+	group.bench_function("real_rehash_hashbrown", |b| {
+		let size = 125_000_000;
+		let ideal_filled = 100_000_000;
+		let mut writer = hashbrown::raw::RawTable::new();
+		let mut rng = rand::rng();
+		let hasher = rustc_hash::FxBuildHasher::default();
+		unsafe {
+			writer.resize(size, |(k,_)| hasher.hash_one(&k),
+						  hashbrown::raw::Fallibility::Infallible).unwrap();
+		}
+		while writer.len() < ideal_filled as usize {
+			let key: FileCacheKey = rng.random();
+			let val = FileCacheEntry::dummy();
+			writer.insert(hasher.hash_one(&key), (key, val), |(k,_)| hasher.hash_one(&k));
+		}
+		b.iter(|| unsafe { writer.table.rehash_in_place(
+			&|table, index| hasher.hash_one(&table.bucket::<(FileCacheKey, FileCacheEntry)>(index).as_ref().0),
+			std::mem::size_of::<(FileCacheKey, FileCacheEntry)>(),
+            if std::mem::needs_drop::<(FileCacheKey, FileCacheEntry)>() {
+                Some(|ptr| std::ptr::drop_in_place(ptr as *mut (FileCacheKey, FileCacheEntry)))
+            } else {
+                None
+            },
+		) });
+	});
+
+	for elems in [2, 4, 8, 16, 32, 64, 96, 112] {
+		group.bench_with_input(BenchmarkId::new("real_rehash_varied", elems), &elems, |b, &size| {
+			let ideal_filled = size * 1_000_000;
+			let size = 125_000_000;
+			let mut writer = HashMapInit::new_resizeable(size, size).attach_writer();
+			let mut rng = rand::rng();		
+			while writer.get_num_buckets_in_use() < ideal_filled as usize {
+				let key: FileCacheKey = rng.random();
+				let val = FileCacheEntry::dummy();
+				apply_op(TestOp(key, Some(val)), &mut writer);
+			}
+			b.iter(|| writer.shuffle());
+		});
+		group.bench_with_input(BenchmarkId::new("real_rehash_varied_hashbrown", elems), &elems, |b, &size| {
+			let ideal_filled = size * 1_000_000;
+			let size = 125_000_000;
+			let mut writer = hashbrown::raw::RawTable::new();
+			let mut rng = rand::rng();
+			let hasher = rustc_hash::FxBuildHasher::default();
+			unsafe {
+				writer.resize(size, |(k,_)| hasher.hash_one(&k),
+							  hashbrown::raw::Fallibility::Infallible).unwrap();
+			}
+			while writer.len() < ideal_filled as usize {
+				let key: FileCacheKey = rng.random();
+				let val = FileCacheEntry::dummy();
+				writer.insert(hasher.hash_one(&key), (key, val), |(k,_)| hasher.hash_one(&k));
+			}
+			b.iter(|| unsafe { writer.table.rehash_in_place(
+				&|table, index| hasher.hash_one(&table.bucket::<(FileCacheKey, FileCacheEntry)>(index).as_ref().0),
+				std::mem::size_of::<(FileCacheKey, FileCacheEntry)>(),
+				if std::mem::needs_drop::<(FileCacheKey, FileCacheEntry)>() {
+					Some(|ptr| std::ptr::drop_in_place(ptr as *mut (FileCacheKey, FileCacheEntry)))
+				} else {
+					None
+				},
+			) });
+		});
+	}
+	
+	group.finish();
+}
+	
+criterion_group!(benches, small_benchs, real_benchs);
+criterion_main!(benches);
--- a/libs/neon-shmem/src/hash.rs
+++ b/libs/neon-shmem/src/hash.rs
@@ -1,309 +1,365 @@
-//! Hash table implementation on top of 'shmem'
+//! Resizable hash table implementation on top of byte-level storage (either a [`ShmemHandle`] or a fixed byte array).
 //!
-//! Features required in the long run by the communicator project:
+//! This hash table has two major components: the bucket array and the dictionary. Each bucket within the
+//! bucket array contains a `Option<(K, V)>` and an index of another bucket. In this way there is both an 
+//! implicit freelist within the bucket array (`None` buckets point to other `None` entries) and various hash
+//! chains within the bucket array (a Some bucket will point to other Some buckets that had the same hash).
 //!
-//! [X] Accessible from both Postgres processes and rust threads in the communicator process
-//! [X] Low latency
-//! [ ] Scalable to lots of concurrent accesses (currently uses a single spinlock)
-//! [ ] Resizable
+//! Buckets are never moved unless they are within a region that is being shrunk, and so the actual hash-
+//! dependent component is done with the dictionary. When a new key is inserted into the map, a position
+//! within the dictionary is decided based on its hash, the data is inserted into an empty bucket based
+//! off of the freelist, and then the index of said bucket is placed in the dictionary.
+//!
+//! This map is resizable (if initialized on top of a [`ShmemHandle`]). Both growing and shrinking happen
+//! in-place and are at a high level achieved by expanding/reducing the bucket array and rebuilding the
+//! dictionary by rehashing all keys.

-use std::fmt::Debug;
-use std::hash::Hash;
+use std::hash::{Hash, BuildHasher};
 use std::mem::MaybeUninit;
-use std::ops::Deref;

+use crate::{shmem, sync::*};
 use crate::shmem::ShmemHandle;

-use spin;
-
 mod core;
+pub mod entry;

 #[cfg(test)]
 mod tests;

-use core::CoreHashMap;
+use core::{Bucket, CoreHashMap, INVALID_POS};
+use entry::{Entry, OccupiedEntry, VacantEntry, PrevPos};

-pub enum UpdateAction<V> {
-    Nothing,
-    Insert(V),
-    Remove,
+/// Builder for a [`HashMapAccess`].
+#[must_use]
+pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> {
+    shmem_handle: Option<ShmemHandle>,
+    shared_ptr: *mut RwLock<HashMapShared<'a, K, V>>,
+	shared_size: usize,
+	hasher: S,
+	num_buckets: u32,
 }

-#[derive(Debug)]
-pub struct OutOfMemoryError();
-
-pub struct HashMapInit<'a, K, V> {
-    // Hash table can be allocated in a fixed memory area, or in a resizeable ShmemHandle.
+/// Accessor for a hash table. 
+pub struct HashMapAccess<'a, K, V, S = rustc_hash::FxBuildHasher> {
    shmem_handle: Option<ShmemHandle>,
    shared_ptr: *mut HashMapShared<'a, K, V>,
+	hasher: S,
 }

-pub struct HashMapAccess<'a, K, V> {
-    shmem_handle: Option<ShmemHandle>,
-    shared_ptr: *mut HashMapShared<'a, K, V>,
-}
+unsafe impl<K: Sync, V: Sync, S> Sync for HashMapAccess<'_, K, V, S> {}
+unsafe impl<K: Send, V: Send, S> Send for HashMapAccess<'_, K, V, S> {}

-unsafe impl<'a, K: Sync, V: Sync> Sync for HashMapAccess<'a, K, V> {}
-unsafe impl<'a, K: Send, V: Send> Send for HashMapAccess<'a, K, V> {}
+impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {
+	pub fn with_hasher<T: BuildHasher>(self, hasher: T) -> HashMapInit<'a, K, V, T> {
+		HashMapInit {
+			hasher,
+			shmem_handle: self.shmem_handle,
+			shared_ptr: self.shared_ptr,
+			shared_size: self.shared_size,
+			num_buckets: self.num_buckets,
+		}
+	}

-impl<'a, K, V> HashMapInit<'a, K, V> {
-    pub fn attach_writer(self) -> HashMapAccess<'a, K, V> {
-        HashMapAccess {
-            shmem_handle: self.shmem_handle,
-            shared_ptr: self.shared_ptr,
-        }
-    }
-
-    pub fn attach_reader(self) -> HashMapAccess<'a, K, V> {
-        // no difference to attach_writer currently
-        self.attach_writer()
-    }
-}
-
-// This is stored in the shared memory area
-struct HashMapShared<'a, K, V> {
-    inner: spin::RwLock<CoreHashMap<'a, K, V>>,
-}
-
-impl<'a, K, V> HashMapInit<'a, K, V>
-where
-    K: Clone + Hash + Eq,
-{
-    pub fn estimate_size(num_buckets: u32) -> usize {
+	/// Loosely (over)estimate the size needed to store a hash table with `num_buckets` buckets.
+	pub fn estimate_size(num_buckets: u32) -> usize {
        // add some margin to cover alignment etc.
        CoreHashMap::<K, V>::estimate_size(num_buckets) + size_of::<HashMapShared<K, V>>() + 1000
    }

-    pub fn init_in_fixed_area(
-        num_buckets: u32,
-        area: &'a mut [MaybeUninit<u8>],
-    ) -> HashMapInit<'a, K, V> {
-        Self::init_common(num_buckets, None, area.as_mut_ptr().cast(), area.len())
-    }
+	/// Initialize a table for writing.
+    pub fn attach_writer(self) -> HashMapAccess<'a, K, V, S> {
+        let mut ptr: *mut u8 = self.shared_ptr.cast();
+        let end_ptr: *mut u8 = unsafe { ptr.add(self.shared_size) };

-    /// Initialize a new hash map in the given shared memory area
-    pub fn init_in_shmem(num_buckets: u32, mut shmem: ShmemHandle) -> HashMapInit<'a, K, V> {
-        let size = Self::estimate_size(num_buckets);
-        shmem
-            .set_size(size)
-            .expect("could not resize shared memory area");
-
-        let ptr = unsafe { shmem.data_ptr.as_mut() };
-        Self::init_common(num_buckets, Some(shmem), ptr, size)
-    }
-
-    fn init_common(
-        num_buckets: u32,
-        shmem_handle: Option<ShmemHandle>,
-        area_ptr: *mut u8,
-        area_len: usize,
-    ) -> HashMapInit<'a, K, V> {
-        // carve out HashMapShared from the area. This does not include the hashmap's dictionary
-        // and buckets.
-        let mut ptr: *mut u8 = area_ptr;
-        ptr = unsafe { ptr.add(ptr.align_offset(align_of::<HashMapShared<K, V>>())) };
-        let shared_ptr: *mut HashMapShared<K, V> = ptr.cast();
+		// carve out area for the One Big Lock (TM) and the HashMapShared.
+		ptr = unsafe { ptr.add(ptr.align_offset(align_of::<libc::pthread_rwlock_t>())) };
+		let raw_lock_ptr = ptr;
+		ptr = unsafe { ptr.add(size_of::<libc::pthread_rwlock_t>()) };
+		ptr = unsafe { ptr.add(ptr.align_offset(align_of::<HashMapShared<K, V>>())) };
+		let shared_ptr: *mut HashMapShared<K, V> = ptr.cast();
        ptr = unsafe { ptr.add(size_of::<HashMapShared<K, V>>()) };
+						
+        // carve out the buckets
+        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<core::Bucket<K, V>>())) };
+        let buckets_ptr = ptr;
+        ptr = unsafe { ptr.add(size_of::<core::Bucket<K, V>>() * self.num_buckets as usize) };

-        // the rest of the space is given to the hash map's dictionary and buckets
-        let remaining_area = unsafe {
-            std::slice::from_raw_parts_mut(ptr, area_len - ptr.offset_from(area_ptr) as usize)
+        // use remaining space for the dictionary
+        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<u32>())) };
+        assert!(ptr.addr() < end_ptr.addr());
+        let dictionary_ptr = ptr;
+        let dictionary_size = unsafe { end_ptr.byte_offset_from(ptr) / size_of::<u32>() as isize };
+        assert!(dictionary_size > 0);
+
+        let buckets =
+            unsafe { std::slice::from_raw_parts_mut(buckets_ptr.cast(), self.num_buckets as usize) };
+        let dictionary = unsafe {
+            std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize)
        };

-        let hashmap = CoreHashMap::new(num_buckets, remaining_area);
-        unsafe {
-            std::ptr::write(
-                shared_ptr,
-                HashMapShared {
-                    inner: spin::RwLock::new(hashmap),
-                },
-            );
-        }
-
-        HashMapInit {
-            shmem_handle,
+        let hashmap = CoreHashMap::new(buckets, dictionary);
+		let lock = RwLock::from_raw(PthreadRwLock::new(raw_lock_ptr.cast()), hashmap);
+		unsafe {
+			std::ptr::write(shared_ptr, lock);
+		}
+		
+        HashMapAccess {
+            shmem_handle: self.shmem_handle,
            shared_ptr,
+			hasher: self.hasher,
        }
    }
+
+	/// Initialize a table for reading. Currently identical to [`HashMapInit::attach_writer`].
+    pub fn attach_reader(self) -> HashMapAccess<'a, K, V, S> {
+        self.attach_writer()
+    }
 }

-impl<'a, K, V> HashMapAccess<'a, K, V>
+/// Hash table data that is actually stored in the shared memory area.
+///
+/// NOTE: We carve out the parts from a contiguous chunk. Growing and shrinking the hash table
+/// relies on the memory layout! The data structures are laid out in the contiguous shared memory
+/// area as follows:
+///
+/// [`libc::pthread_rwlock_t`]
+/// [`HashMapShared`]
+/// [buckets]
+/// [dictionary]
+///
+/// In between the above parts, there can be padding bytes to align the parts correctly.
+type HashMapShared<'a, K, V> = RwLock<CoreHashMap<'a, K, V>>;
+
+impl<'a, K, V> HashMapInit<'a, K, V, rustc_hash::FxBuildHasher>
+where
+	K: Clone + Hash + Eq
+{
+	/// Place the hash table within a user-supplied fixed memory area.
+	pub fn with_fixed(
+		num_buckets: u32,
+        area: &'a mut [MaybeUninit<u8>],
+    ) -> Self {
+		Self {
+			num_buckets,
+			shmem_handle: None,
+			shared_ptr: area.as_mut_ptr().cast(),
+			shared_size: area.len(),
+			hasher: rustc_hash::FxBuildHasher,
+		}		
+    }
+
+    /// Place a new hash map in the given shared memory area
+	///
+	/// # Panics
+	/// Will panic on failure to resize area to expected map size.
+    pub fn with_shmem(num_buckets: u32, shmem: ShmemHandle) -> Self {
+		let size = Self::estimate_size(num_buckets);
+		shmem
+            .set_size(size)
+            .expect("could not resize shared memory area");
+		Self {
+			num_buckets,
+			shared_ptr: shmem.data_ptr.as_ptr().cast(),
+			shmem_handle: Some(shmem),
+			shared_size: size,
+			hasher: rustc_hash::FxBuildHasher
+		}
+    }
+
+	/// Make a resizable hash map within a new shared memory area with the given name.
+	pub fn new_resizeable_named(num_buckets: u32, max_buckets: u32, name: &str) -> Self {
+		let size = Self::estimate_size(num_buckets);
+		let max_size = Self::estimate_size(max_buckets);
+		let shmem = ShmemHandle::new(name, size, max_size)
+			.expect("failed to make shared memory area");
+		
+		Self {
+			num_buckets,
+			shared_ptr: shmem.data_ptr.as_ptr().cast(),
+			shmem_handle: Some(shmem),
+			shared_size: size,
+			hasher: rustc_hash::FxBuildHasher
+		}
+	}
+
+	/// Make a resizable hash map within a new anonymous shared memory area.
+	pub fn new_resizeable(num_buckets: u32, max_buckets: u32) -> Self {
+		use std::sync::atomic::{AtomicUsize, Ordering};
+		static COUNTER: AtomicUsize = AtomicUsize::new(0);
+		let val = COUNTER.fetch_add(1, Ordering::Relaxed);
+		let name = format!("neon_shmem_hmap{val}");
+		Self::new_resizeable_named(num_buckets, max_buckets, &name)
+	}
+}
+
+impl<'a, K, V, S: BuildHasher> HashMapAccess<'a, K, V, S>
 where
    K: Clone + Hash + Eq,
 {
-    pub fn get<'e>(&'e self, key: &K) -> Option<ValueReadGuard<'e, K, V>> {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let lock_guard = map.inner.read();
+	/// Hash a key using the map's hasher.
+	#[inline]
+    fn get_hash_value(&self, key: &K) -> u64 {
+		self.hasher.hash_one(key)        
+    }

-        match lock_guard.get(key) {
-            None => None,
-            Some(val_ref) => {
-                let val_ptr = std::ptr::from_ref(val_ref);
-                Some(ValueReadGuard {
-                    _lock_guard: lock_guard,
-                    value: val_ptr,
-                })
+	fn entry_with_hash(&self, key: K, hash: u64) -> Entry<'a, '_, K, V> {
+		let mut map = unsafe { self.shared_ptr.as_ref() }.unwrap().write();
+        let dict_pos = hash as usize % map.dictionary.len();
+        let first = map.dictionary[dict_pos];
+        if first == INVALID_POS {
+            // no existing entry
+            return Entry::Vacant(VacantEntry {
+                map,
+                key,
+                dict_pos: dict_pos as u32,
+            });
+        }
+
+        let mut prev_pos = PrevPos::First(dict_pos as u32);
+        let mut next = first;
+        loop {
+            let bucket = &mut map.buckets[next as usize];
+            let (bucket_key, _bucket_value) = bucket.inner.as_mut().expect("entry is in use");
+            if *bucket_key == key {
+                // found existing entry
+                return Entry::Occupied(OccupiedEntry {
+                    map,
+                    _key: key,
+                    prev_pos,
+                    bucket_pos: next,
+                });
            }
+
+            if bucket.next == INVALID_POS {
+                // No existing entry
+                return Entry::Vacant(VacantEntry {
+                    map,
+                    key,
+                    dict_pos: dict_pos as u32,
+                });
+            }
+            prev_pos = PrevPos::Chained(next);
+            next = bucket.next;
+        }
+	}
+	
+	/// Get a reference to the corresponding value for a key.
+    pub fn get<'e>(&'e self, key: &K) -> Option<ValueReadGuard<'e, V>> {
+		let hash = self.get_hash_value(key);
+        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
+		RwLockReadGuard::try_map(map, |m| m.get_with_hash(key, hash)).ok()
+    }
+
+	/// Get a reference to the entry containing a key.
+    pub fn entry(&self, key: K) -> Entry<'a, '_, K, V> {
+		let hash = self.get_hash_value(&key);
+		self.entry_with_hash(key, hash)
+    }
+
+	/// Remove a key given its hash. Returns the associated value if it existed.
+    pub fn remove(&self, key: &K) -> Option<V> {
+		let hash = self.get_hash_value(&key);
+        match self.entry_with_hash(key.clone(), hash) {
+            Entry::Occupied(e) => Some(e.remove()),
+            Entry::Vacant(_) => None
        }
    }

-    /// Insert a value
-    pub fn insert(&self, key: &K, value: V) -> Result<bool, OutOfMemoryError> {
-        let mut success = None;
-
-        self.update_with_fn(key, |existing| {
-            if existing.is_some() {
-                success = Some(false);
-                UpdateAction::Nothing
-            } else {
-                success = Some(true);
-                UpdateAction::Insert(value)
-            }
-        })?;
-        Ok(success.expect("value_fn not called"))
-    }
-
-    /// Remove value. Returns true if it existed
-    pub fn remove(&self, key: &K) -> bool {
-        let mut result = false;
-        self.update_with_fn(key, |existing| match existing {
-            Some(_) => {
-                result = true;
-                UpdateAction::Remove
-            }
-            None => UpdateAction::Nothing,
-        })
-        .expect("out of memory while removing");
-        result
-    }
-
-    /// Update key using the given function. All the other modifying operations are based on this.
-    pub fn update_with_fn<F>(&self, key: &K, value_fn: F) -> Result<(), OutOfMemoryError>
-    where
-        F: FnOnce(Option<&V>) -> UpdateAction<V>,
-    {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let mut lock_guard = map.inner.write();
-
-        let old_val = lock_guard.get(key);
-        let action = value_fn(old_val);
-        match (old_val, action) {
-            (_, UpdateAction::Nothing) => {}
-            (_, UpdateAction::Insert(new_val)) => {
-                let _ = lock_guard.insert(key, new_val);
-            }
-            (None, UpdateAction::Remove) => panic!("Remove action with no old value"),
-            (Some(_), UpdateAction::Remove) => {
-                let _ = lock_guard.remove(key);
-            }
+	/// Insert/update a key. Returns the previous associated value if it existed.
+	///
+	/// # Errors
+	/// Will return [`core::FullError`] if there is no more space left in the map.
+    pub fn insert(&self, key: K, value: V) -> Result<Option<V>, core::FullError> {
+		let hash = self.get_hash_value(&key);
+        match self.entry_with_hash(key.clone(), hash) {
+            Entry::Occupied(mut e) => Ok(Some(e.insert(value))),
+            Entry::Vacant(e) => {
+				_ = e.insert(value)?;
+				Ok(None)
+			}
        }
+    }
+	
+	/// Optionally return the entry for a bucket at a given index if it exists.
+	///
+	/// Has more overhead than one would intuitively expect: performs both a clone of the key
+	/// due to the [`OccupiedEntry`] type owning the key and also a hash of the key in order
+	/// to enable repairing the hash chain if the entry is removed.
+    pub fn entry_at_bucket(&self, pos: usize) -> Option<OccupiedEntry<'a, '_, K, V>> {
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
+		if pos >= map.buckets.len() {
+			return None;
+		}

-        Ok(())
-    }
-
-    /// Update key using the given function. All the other modifying operations are based on this.
-    pub fn update_with_fn_at_bucket<F>(
-        &self,
-        pos: usize,
-        value_fn: F,
-    ) -> Result<(), OutOfMemoryError>
-    where
-        F: FnOnce(Option<&V>) -> UpdateAction<V>,
-    {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let mut lock_guard = map.inner.write();
-
-        let old_val = lock_guard.get_bucket(pos);
-        let action = value_fn(old_val.map(|(_k, v)| v));
-        match (old_val, action) {
-            (_, UpdateAction::Nothing) => {}
-            (_, UpdateAction::Insert(_new_val)) => panic!("cannot insert without key"),
-            (None, UpdateAction::Remove) => panic!("Remove action with no old value"),
-            (Some((key, _value)), UpdateAction::Remove) => {
-                let key = key.clone();
-                let _ = lock_guard.remove(&key);
-            }
-        }
-
-        Ok(())
+		let entry = map.buckets[pos].inner.as_ref();
+		match entry {
+			Some((key, _)) => Some(OccupiedEntry {
+				_key: key.clone(),
+				bucket_pos: pos as u32,
+				prev_pos: entry::PrevPos::Unknown(
+					self.get_hash_value(&key)
+				),
+				map,
+			}),
+			_ => None,
+		}
    }

+	/// Returns the number of buckets in the table.
    pub fn get_num_buckets(&self) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        map.inner.read().get_num_buckets()
+        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
+        map.get_num_buckets()
    }

    /// Return the key and value stored in bucket with given index. This can be used to
-    /// iterate through the hash map. (An Iterator might be nicer. The communicator's
-    /// clock algorithm needs to _slowly_ iterate through all buckets with its clock hand,
-    /// without holding a lock. If we switch to an Iterator, it must not hold the lock.)
-    pub fn get_bucket<'e>(&'e self, pos: usize) -> Option<ValueReadGuard<'e, K, V>> {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let lock_guard = map.inner.read();
-
-        match lock_guard.get_bucket(pos) {
-            None => None,
-            Some((_key, val_ref)) => {
-                let val_ptr = std::ptr::from_ref(val_ref);
-                Some(ValueReadGuard {
-                    _lock_guard: lock_guard,
-                    value: val_ptr,
-                })
-            }
+    /// iterate through the hash map.
+	// TODO: An Iterator might be nicer. The communicator's clock algorithm needs to
+	// _slowly_ iterate through all buckets with its clock hand,  without holding a lock.
+	// If we switch to an Iterator, it must not hold the lock.
+    pub fn get_at_bucket(&self, pos: usize) -> Option<ValueReadGuard<(K, V)>> {
+        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
+        if pos >= map.buckets.len() {
+            return None;
        }
+		RwLockReadGuard::try_map(map, |m| m.buckets[pos].inner.as_ref()).ok()
    }

-    // for metrics
+	/// Returns the index of the bucket a given value corresponds to.
+    pub fn get_bucket_for_value(&self, val_ptr: *const V) -> usize {
+        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
+
+        let origin = map.buckets.as_ptr();
+        let idx = (val_ptr as usize - origin as usize) / size_of::<Bucket<K, V>>();
+        assert!(idx < map.buckets.len());
+
+        idx
+    }
+
+    /// Returns the number of occupied buckets in the table.
    pub fn get_num_buckets_in_use(&self) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        map.inner.read().buckets_in_use as usize
+        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
+        map.buckets_in_use as usize
    }

-    /// Grow
-    ///
-    /// 1. grow the underlying shared memory area
-    /// 2. Initialize new buckets. This overwrites the current dictionary
-    /// 3. Recalculate the dictionary
-    pub fn grow(&self, num_buckets: u32) -> Result<(), crate::shmem::Error> {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let mut lock_guard = map.inner.write();
-        let inner = &mut *lock_guard;
-        let old_num_buckets = inner.buckets.len() as u32;
-
-        if num_buckets < old_num_buckets {
-            panic!("grow called with a smaller number of buckets");
-        }
-        if num_buckets == old_num_buckets {
-            return Ok(());
-        }
-        let shmem_handle = self
-            .shmem_handle
-            .as_ref()
-            .expect("grow called on a fixed-size hash table");
-
-        let size_bytes = HashMapInit::<K, V>::estimate_size(num_buckets);
-        shmem_handle.set_size(size_bytes)?;
-        let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
-
-        // Initialize new buckets. The new buckets are linked to the free list. NB: This overwrites
-        // the dictionary!
-        let buckets_ptr = inner.buckets.as_mut_ptr();
-        unsafe {
-            for i in old_num_buckets..num_buckets {
-                let bucket_ptr = buckets_ptr.add(i as usize);
-                bucket_ptr.write(core::Bucket {
-                    hash: 0,
-                    next: if i < num_buckets {
-                        i + 1
-                    } else {
-                        inner.free_head
-                    },
-                    inner: None,
-                });
-            }
-        }
-
-        // Recalculate the dictionary
+	/// Clears all entries in a table. Does not reset any shrinking operations.
+	pub fn clear(&self) {
+		let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
+        map.clear();
+	}
+	
+	/// Perform an in-place rehash of some region (0..`rehash_buckets`) of the table and reset
+	/// the `buckets` and `dictionary` slices to be as long as `num_buckets`. Resets the freelist
+	/// in the process.
+	fn rehash_dict(
+		&self,
+		inner: &mut CoreHashMap<'a, K, V>,
+		buckets_ptr: *mut core::Bucket<K, V>,
+		end_ptr: *mut u8,
+		num_buckets: u32,
+		rehash_buckets: u32,
+	) {
+		inner.free_head = INVALID_POS;
+		
        let buckets;
        let dictionary;
        unsafe {
@@ -316,52 +372,163 @@ where

            buckets = std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize);
            dictionary = std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size);
+        }		
+        for e in dictionary.iter_mut() {
+            *e = INVALID_POS;
        }
-        for item in dictionary.iter_mut() {
-            *item = core::INVALID_POS;
-        }
-
-        #[allow(clippy::needless_range_loop)]
-        for i in 0..old_num_buckets as usize {
-            if buckets[i].inner.is_none() {
-                continue;
+		
+        for (i, bucket) in buckets.iter_mut().enumerate().take(rehash_buckets as usize) {
+            if bucket.inner.is_none() {
+				bucket.next = inner.free_head;
+                inner.free_head = i as u32;
+				continue;
            }
-            let pos: usize = (buckets[i].hash % dictionary.len() as u64) as usize;
-            buckets[i].next = dictionary[pos];
+
+			let hash = self.hasher.hash_one(&bucket.inner.as_ref().unwrap().0);
+            let pos: usize = (hash % dictionary.len() as u64) as usize;
+            bucket.next = dictionary[pos];
            dictionary[pos] = i as u32;
        }

-        // Finally, update the CoreHashMap struct
        inner.dictionary = dictionary;
        inner.buckets = buckets;
-        inner.free_head = old_num_buckets;
+	}
+
+	/// Rehash the map without growing or shrinking. 
+	pub fn shuffle(&self) {
+		let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
+		let num_buckets = map.get_num_buckets() as u32;
+		let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
+		let end_ptr: *mut u8 = unsafe { self.shared_ptr.byte_add(size_bytes).cast() };
+        let buckets_ptr = map.buckets.as_mut_ptr();
+		self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets);
+	}
+
+    /// Grow the number of buckets within the table. 
+    ///
+    /// 1. Grows the underlying shared memory area
+    /// 2. Initializes new buckets and overwrites the current dictionary
+    /// 3. Rehashes the dictionary
+	///
+	/// # Panics 
+	/// Panics if called on a map initialized with [`HashMapInit::with_fixed`].
+	///
+	/// # Errors
+	/// Returns an [`shmem::Error`] if any errors occur resizing the memory region.
+    pub fn grow(&self, num_buckets: u32) -> Result<(), shmem::Error> {
+        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
+        let old_num_buckets = map.buckets.len() as u32;
+
+        assert!(num_buckets >= old_num_buckets, "grow called with a smaller number of buckets");
+        if num_buckets == old_num_buckets {
+            return Ok(());
+        }
+        let shmem_handle = self
+            .shmem_handle
+            .as_ref()
+            .expect("grow called on a fixed-size hash table");
+
+        let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
+        shmem_handle.set_size(size_bytes)?;
+        let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
+
+        // Initialize new buckets. The new buckets are linked to the free list.
+		// NB: This overwrites the dictionary!
+        let buckets_ptr = map.buckets.as_mut_ptr();
+        unsafe {
+            for i in old_num_buckets..num_buckets {
+                let bucket = buckets_ptr.add(i as usize);
+                bucket.write(core::Bucket {
+                    next: if i < num_buckets-1 {
+                        i + 1
+                    } else {
+                        map.free_head
+                    },
+                    inner: None,
+                });
+            }
+        }
+
+		self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, old_num_buckets);
+        map.free_head = old_num_buckets;

        Ok(())
    }

-    // TODO: Shrinking is a multi-step process that requires co-operation from the caller
-    //
-    // 1. The caller must first call begin_shrink(). That forbids allocation of higher-numbered
-    // buckets.
-    //
-    // 2. Next, the caller must evict all entries in higher-numbered buckets.
-    //
-    // 3. Finally, call finish_shrink(). This recomputes the dictionary and shrinks the underlying
-    //    shmem area
-}
+	/// Begin a shrink, limiting all new allocations to be in buckets with index below `num_buckets`.
+	///
+	/// # Panics
+	/// Panics if called on a map initialized with [`HashMapInit::with_fixed`] or if `num_buckets` is
+	/// greater than the number of buckets in the map.
+	pub fn begin_shrink(&mut self, num_buckets: u32) {
+		let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
+		assert!(
+			num_buckets <= map.get_num_buckets() as u32,
+            "shrink called with a larger number of buckets"
+        );
+		_ = self
+            .shmem_handle
+            .as_ref()
+            .expect("shrink called on a fixed-size hash table");
+		map.alloc_limit = num_buckets;
+	}

-pub struct ValueReadGuard<'a, K, V> {
-    _lock_guard: spin::RwLockReadGuard<'a, CoreHashMap<'a, K, V>>,
-    value: *const V,
-}
+	/// If a shrink operation is underway, returns the target size of the map. Otherwise, returns None.
+	pub fn shrink_goal(&self) -> Option<usize> {
+		let map = unsafe { self.shared_ptr.as_mut() }.unwrap().read();
+        let goal = map.alloc_limit;
+		if goal == INVALID_POS { None } else { Some(goal as usize) }
+	}
+	
+	/// Complete a shrink after caller has evicted entries, removing the unused buckets and rehashing.
+	///
+	/// # Panics
+	/// The following cases result in a panic: 
+	/// - Calling this function on a map initialized with [`HashMapInit::with_fixed`].
+	/// - Calling this function on a map when no shrink operation is in progress.
+	/// - Calling this function on a map with `shrink_mode` set to [`HashMapShrinkMode::Remap`] and
+	///   there are more buckets in use than the value returned by [`HashMapAccess::shrink_goal`].
+	///
+	/// # Errors
+	/// Returns an [`shmem::Error`] if any errors occur resizing the memory region.
+	pub fn finish_shrink(&self) -> Result<(), shmem::Error> {
+		let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
+		assert!(
+			map.alloc_limit != INVALID_POS,
+			"called finish_shrink when no shrink is in progress"
+		);

-impl<'a, K, V> Deref for ValueReadGuard<'a, K, V> {
-    type Target = V;
+		let num_buckets = map.alloc_limit; 

-    fn deref(&self) -> &Self::Target {
-        // SAFETY: The `lock_guard` ensures that the underlying map (and thus the value pointed to
-        // by `value`) remains valid for the lifetime `'a`. The `value` has been obtained from a
-        // valid reference within the map.
-        unsafe { &*self.value }
-    }
+		if map.get_num_buckets() == num_buckets as usize {
+            return Ok(());
+        }
+
+		assert!(
+			map.buckets_in_use <= num_buckets,
+			"called finish_shrink before enough entries were removed"
+		);
+		
+		for i in (num_buckets as usize)..map.buckets.len() {
+			if let Some((k, v)) = map.buckets[i].inner.take() {
+				// alloc_bucket increases count, so need to decrease since we're just moving
+				map.buckets_in_use -= 1;
+				map.alloc_bucket(k, v).unwrap();
+			}
+		}
+
+        let shmem_handle = self
+            .shmem_handle
+            .as_ref()
+            .expect("shrink called on a fixed-size hash table");
+
+		let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
+        shmem_handle.set_size(size_bytes)?;
+        let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
+		let buckets_ptr = map.buckets.as_mut_ptr();
+		self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets);
+		map.alloc_limit = INVALID_POS;
+		
+		Ok(())
+	}
 }
--- a/libs/neon-shmem/src/hash/core.rs
+++ b/libs/neon-shmem/src/hash/core.rs
@@ -1,37 +1,47 @@
-//! Simple hash table with chaining
-//!
-//! # Resizing
-//!
+//! Simple hash table with chaining.

-use std::hash::{DefaultHasher, Hash, Hasher};
+use std::hash::Hash;
 use std::mem::MaybeUninit;

+use crate::hash::entry::*;
+
+/// Invalid position within the map (either within the dictionary or bucket array).
 pub(crate) const INVALID_POS: u32 = u32::MAX;

-// Bucket
+/// Fundamental storage unit within the hash table. Either empty or contains a key-value pair.
+/// Always part of a chain of some kind (either a freelist if empty or a hash chain if full).
 pub(crate) struct Bucket<K, V> {
-    pub(crate) hash: u64,
-    pub(crate) next: u32,
+	/// Index of next bucket in the chain.
+	pub(crate) next: u32,
+	/// Key-value pair contained within bucket.
    pub(crate) inner: Option<(K, V)>,
 }

+/// Core hash table implementation.
 pub(crate) struct CoreHashMap<'a, K, V> {
+	/// Dictionary used to map hashes to bucket indices.
    pub(crate) dictionary: &'a mut [u32],
+	/// Buckets containing key-value pairs.
    pub(crate) buckets: &'a mut [Bucket<K, V>],
+	/// Head of the freelist.
    pub(crate) free_head: u32,
-
-    // metrics
+	/// Maximum index of a bucket allowed to be allocated. [`INVALID_POS`] if no limit.
+	pub(crate) alloc_limit: u32,
+    /// The number of currently occupied buckets.
    pub(crate) buckets_in_use: u32,
+	// pub(crate) lock: libc::pthread_mutex_t,
+	// Unclear what the purpose of this is.
+    pub(crate) _user_list_head: u32,
 }

+/// Error for when there are no empty buckets left but one is needed.
+#[derive(Debug, PartialEq)]
 pub struct FullError();

-impl<'a, K, V> CoreHashMap<'a, K, V>
-where
-    K: Clone + Hash + Eq,
-{
+impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> {
    const FILL_FACTOR: f32 = 0.60;

+	/// Estimate the size of data contained within the the hash map.
    pub fn estimate_size(num_buckets: u32) -> usize {
        let mut size = 0;

@@ -43,75 +53,48 @@ where
            as usize;

        size
-    }
-
-    pub fn new(num_buckets: u32, area: &'a mut [u8]) -> CoreHashMap<'a, K, V> {
-        let len = area.len();
-
-        let mut ptr: *mut u8 = area.as_mut_ptr();
-        let end_ptr: *mut u8 = unsafe { area.as_mut_ptr().add(len) };
-
-        // carve out the buckets
-        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<Bucket<K, V>>())) };
-        let buckets_ptr = ptr;
-        ptr = unsafe { ptr.add(size_of::<Bucket<K, V>>() * num_buckets as usize) };
-
-        // use remaining space for the dictionary
-        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<u32>())) };
-        let dictionary_ptr = ptr;
-
-        assert!(ptr.addr() < end_ptr.addr());
-        let dictionary_size = unsafe { end_ptr.byte_offset_from(ptr) / size_of::<u32>() as isize };
-        assert!(dictionary_size > 0);
+    }	

+    pub fn new(
+        buckets: &'a mut [MaybeUninit<Bucket<K, V>>],
+        dictionary: &'a mut [MaybeUninit<u32>],
+    ) -> Self {
        // Initialize the buckets
-        let buckets = {
-            let buckets_ptr: *mut MaybeUninit<Bucket<K, V>> = buckets_ptr.cast();
-            let buckets =
-                unsafe { std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize) };
-            for i in 0..buckets.len() {
-                buckets[i].write(Bucket {
-                    hash: 0,
-                    next: if i < buckets.len() - 1 {
-                        i as u32 + 1
-                    } else {
-                        INVALID_POS
-                    },
-                    inner: None,
-                });
-            }
-            // TODO: use std::slice::assume_init_mut() once it stabilizes
-            unsafe { std::slice::from_raw_parts_mut(buckets_ptr.cast(), num_buckets as usize) }
-        };
+        for i in 0..buckets.len() {
+            buckets[i].write(Bucket {
+                next: if i < buckets.len() - 1 {
+                    i as u32 + 1
+                } else {
+                    INVALID_POS
+                },				
+                inner: None,
+            });
+        }

        // Initialize the dictionary
-        let dictionary = {
-            let dictionary_ptr: *mut MaybeUninit<u32> = dictionary_ptr.cast();
-            let dictionary =
-                unsafe { std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size as usize) };
+        for e in dictionary.iter_mut() {
+            e.write(INVALID_POS);
+        }

-            for item in dictionary.iter_mut() {
-                item.write(INVALID_POS);
-            }
-            // TODO: use std::slice::assume_init_mut() once it stabilizes
-            unsafe {
-                std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize)
-            }
+        // TODO: use std::slice::assume_init_mut() once it stabilizes
+        let buckets =
+            unsafe { std::slice::from_raw_parts_mut(buckets.as_mut_ptr().cast(), buckets.len()) };
+        let dictionary = unsafe {
+            std::slice::from_raw_parts_mut(dictionary.as_mut_ptr().cast(), dictionary.len())
        };

-        CoreHashMap {
+        Self {
            dictionary,
            buckets,
            free_head: 0,
            buckets_in_use: 0,
+            _user_list_head: INVALID_POS,
+			alloc_limit: INVALID_POS,
        }
    }

-    pub fn get(&self, key: &K) -> Option<&V> {
-        let mut hasher = DefaultHasher::new();
-        key.hash(&mut hasher);
-        let hash = hasher.finish();
-
+	/// Get the value associated with a key (if it exists) given its hash.
+    pub fn get_with_hash(&self, key: &K, hash: u64) -> Option<&V> {
        let mut next = self.dictionary[hash as usize % self.dictionary.len()];
        loop {
            if next == INVALID_POS {
@@ -127,107 +110,68 @@ where
        }
    }

-    pub fn insert(&mut self, key: &K, value: V) -> Result<(), FullError> {
-        let mut hasher = DefaultHasher::new();
-        key.hash(&mut hasher);
-        let hash = hasher.finish();
-
-        let first = self.dictionary[hash as usize % self.dictionary.len()];
-        if first == INVALID_POS {
-            // no existing entry
-            let pos = self.alloc_bucket(key.clone(), value, hash)?;
-            if pos == INVALID_POS {
-                return Err(FullError());
-            }
-            self.dictionary[hash as usize % self.dictionary.len()] = pos;
-            return Ok(());
-        }
-
-        let mut next = first;
-        loop {
-            let bucket = &mut self.buckets[next as usize];
-            let (bucket_key, bucket_value) = bucket.inner.as_mut().expect("entry is in use");
-            if bucket_key == key {
-                // found existing entry, update its value
-                *bucket_value = value;
-                return Ok(());
-            }
-
-            if bucket.next == INVALID_POS {
-                // No existing entry found. Append to the chain
-                let pos = self.alloc_bucket(key.clone(), value, hash)?;
-                if pos == INVALID_POS {
-                    return Err(FullError());
-                }
-                self.buckets[next as usize].next = pos;
-                return Ok(());
-            }
-            next = bucket.next;
-        }
-    }
-
-    pub fn remove(&mut self, key: &K) -> Result<(), FullError> {
-        let mut hasher = DefaultHasher::new();
-        key.hash(&mut hasher);
-        let hash = hasher.finish();
-
-        let mut next = self.dictionary[hash as usize % self.dictionary.len()];
-        let mut prev_pos: u32 = INVALID_POS;
-        loop {
-            if next == INVALID_POS {
-                // no existing entry
-                return Ok(());
-            }
-            let bucket = &mut self.buckets[next as usize];
-            let (bucket_key, _) = bucket.inner.as_mut().expect("entry is in use");
-            if bucket_key == key {
-                // found existing entry, unlink it from the chain
-                if prev_pos == INVALID_POS {
-                    self.dictionary[hash as usize % self.dictionary.len()] = bucket.next;
-                } else {
-                    self.buckets[prev_pos as usize].next = bucket.next;
-                }
-
-                // and add it to the freelist
-                let bucket = &mut self.buckets[next as usize];
-                bucket.hash = 0;
-                bucket.inner = None;
-                bucket.next = self.free_head;
-                self.free_head = next;
-                self.buckets_in_use -= 1;
-                return Ok(());
-            }
-            prev_pos = next;
-            next = bucket.next;
-        }
-    }
-
+	/// Get number of buckets in map.
    pub fn get_num_buckets(&self) -> usize {
        self.buckets.len()
    }

-    pub fn get_bucket(&self, pos: usize) -> Option<&(K, V)> {
-        if pos >= self.buckets.len() {
-            return None;
+	/// Clears all entries from the hashmap.
+	///
+	/// Does not reset any allocation limits, but does clear any entries beyond them.
+	pub fn clear(&mut self) {
+		for i in 0..self.buckets.len() {
+            self.buckets[i] = Bucket {
+                next: if i < self.buckets.len() - 1 {
+                    i as u32 + 1
+                } else {
+                    INVALID_POS
+                },				
+                inner: None,
+            }
+        }
+        for i in 0..self.dictionary.len() {
+            self.dictionary[i] = INVALID_POS;
        }

-        self.buckets[pos].inner.as_ref()
-    }
+		self.free_head = 0;
+		self.buckets_in_use = 0;
+	}

-    fn alloc_bucket(&mut self, key: K, value: V, hash: u64) -> Result<u32, FullError> {
-        let pos = self.free_head;
-        if pos == INVALID_POS {
-            return Err(FullError());
-        }
+	/// Find the position of an unused bucket via the freelist and initialize it. 
+    pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result<u32, FullError> {
+        let mut pos = self.free_head;

-        let bucket = &mut self.buckets[pos as usize];
-        self.free_head = bucket.next;
-        self.buckets_in_use += 1;
+		// Find the first bucket we're *allowed* to use.
+		let mut prev = PrevPos::First(self.free_head);
+		while pos != INVALID_POS && pos >= self.alloc_limit {
+			let bucket = &mut self.buckets[pos as usize];
+			prev = PrevPos::Chained(pos);
+			pos = bucket.next;
+		}
+		if pos == INVALID_POS {
+			return Err(FullError());
+		}

-        bucket.hash = hash;
+		// Repair the freelist.
+		match prev {
+			PrevPos::First(_) => {
+				let next_pos = self.buckets[pos as usize].next;
+				self.free_head = next_pos;				
+			}
+			PrevPos::Chained(p) => if p != INVALID_POS {
+				let next_pos = self.buckets[pos as usize].next;
+				self.buckets[p as usize].next = next_pos;
+			},
+			_ => unreachable!()
+		}
+
+		// Initialize the bucket.
+		let bucket = &mut self.buckets[pos as usize];
+		self.buckets_in_use += 1;
        bucket.next = INVALID_POS;
        bucket.inner = Some((key, value));

        Ok(pos)
    }
 }
+
--- a/libs/neon-shmem/src/hash/entry.rs
+++ b/libs/neon-shmem/src/hash/entry.rs
@@ -0,0 +1,139 @@
+//! Equivalent of [`std::collections::hash_map::Entry`] for this hashmap.
+
+use crate::hash::core::{CoreHashMap, FullError, INVALID_POS};
+use crate::sync::{RwLockWriteGuard, ValueWriteGuard};
+
+use std::hash::Hash;
+use std::mem;
+
+
+pub enum Entry<'a, 'b, K, V> {
+	Occupied(OccupiedEntry<'a, 'b, K, V>),
+    Vacant(VacantEntry<'a, 'b, K, V>),
+}
+
+/// Enum representing the previous position within a chain.
+#[derive(Clone, Copy)]
+pub(crate) enum PrevPos {
+	/// Starting index within the dictionary.  
+    First(u32),
+	/// Regular index within the buckets.
+    Chained(u32),
+	/// Unknown - e.g. the associated entry was retrieved by index instead of chain.
+	Unknown(u64),
+}
+
+pub struct OccupiedEntry<'a, 'b, K, V> {
+	/// Mutable reference to the map containing this entry.
+	pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>,
+	/// The key of the occupied entry
+    pub(crate) _key: K,
+	/// The index of the previous entry in the chain.
+    pub(crate) prev_pos: PrevPos,
+	/// The position of the bucket in the [`CoreHashMap`] bucket array.
+    pub(crate) bucket_pos: u32,
+}
+
+impl<K, V> OccupiedEntry<'_, '_, K, V> {
+    pub fn get(&self) -> &V {
+        &self.map.buckets[self.bucket_pos as usize]
+            .inner
+            .as_ref()
+            .unwrap()
+            .1
+    }
+
+    pub fn get_mut(&mut self) -> &mut V {
+        &mut self.map.buckets[self.bucket_pos as usize]
+            .inner
+            .as_mut()
+            .unwrap()
+            .1
+    }
+
+	/// Inserts a value into the entry, replacing (and returning) the existing value.
+    pub fn insert(&mut self, value: V) -> V {
+        let bucket = &mut self.map.buckets[self.bucket_pos as usize];
+        // This assumes inner is Some, which it must be for an OccupiedEntry
+        mem::replace(&mut bucket.inner.as_mut().unwrap().1, value)
+    }
+
+	/// Removes the entry from the hash map, returning the value originally stored within it.
+	///
+	/// This may result in multiple bucket accesses if the entry was obtained by index as the
+	/// previous chain entry needs to be discovered in this case.
+	///
+	/// # Panics
+	/// Panics if the `prev_pos` field is equal to [`PrevPos::Unknown`]. In practice, this means
+	/// the entry was obtained via calling something like [`CoreHashMap::entry_at_bucket`].
+    pub fn remove(mut self) -> V {
+		// If this bucket was queried by index, go ahead and follow its chain from the start.
+		let prev = if let PrevPos::Unknown(hash) = self.prev_pos {
+			let dict_idx = hash as usize % self.map.dictionary.len();
+			let mut prev = PrevPos::First(dict_idx as u32);
+			let mut curr = self.map.dictionary[dict_idx];
+			while curr != self.bucket_pos {
+				curr = self.map.buckets[curr as usize].next;
+				prev = PrevPos::Chained(curr);
+			}
+			prev 	
+		} else {
+			self.prev_pos
+		};
+		
+        // CoreHashMap::remove returns Option<(K, V)>. We know it's Some for an OccupiedEntry.
+        let bucket = &mut self.map.buckets[self.bucket_pos as usize];
+		
+        // unlink it from the chain
+        match prev {
+            PrevPos::First(dict_pos) => {
+				self.map.dictionary[dict_pos as usize] = bucket.next;
+			},
+            PrevPos::Chained(bucket_pos) => {
+				// println!("we think prev of {} is {bucket_pos}", self.bucket_pos);
+                self.map.buckets[bucket_pos as usize].next = bucket.next;
+            },
+			_ => unreachable!(),			
+        }
+
+        // and add it to the freelist
+		let free = self.map.free_head;
+        let bucket = &mut self.map.buckets[self.bucket_pos as usize];
+        let old_value = bucket.inner.take();
+		bucket.next = free;
+        self.map.free_head = self.bucket_pos;
+        self.map.buckets_in_use -= 1;
+
+        old_value.unwrap().1
+    }
+}
+
+/// An abstract view into a vacant entry within the map.
+pub struct VacantEntry<'a, 'b, K, V> {
+	/// Mutable reference to the map containing this entry.
+	pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>,
+	/// The key to be inserted into this entry.
+    pub(crate) key: K,
+	/// The position within the dictionary corresponding to the key's hash.
+    pub(crate) dict_pos: u32,
+}
+
+impl<'b, K: Clone + Hash + Eq, V> VacantEntry<'_, 'b, K, V> {
+	/// Insert a value into the vacant entry, finding and populating an empty bucket in the process.
+	///
+	/// # Errors
+	/// Will return [`FullError`] if there are no unoccupied buckets in the map.
+    pub fn insert(mut self, value: V) -> Result<ValueWriteGuard<'b, V>, FullError> {
+        let pos = self.map.alloc_bucket(self.key, value)?;
+        if pos == INVALID_POS {
+            return Err(FullError());
+        }
+        self.map.buckets[pos as usize].next = self.map.dictionary[self.dict_pos as usize];
+        self.map.dictionary[self.dict_pos as usize] = pos;
+
+		Ok(RwLockWriteGuard::map(
+			self.map,
+			|m| &mut m.buckets[pos as usize].inner.as_mut().unwrap().1
+		))
+    }
+}
--- a/libs/neon-shmem/src/hash/tests.rs
+++ b/libs/neon-shmem/src/hash/tests.rs
@@ -1,12 +1,12 @@
 use std::collections::BTreeMap;
 use std::collections::HashSet;
-use std::fmt::{Debug, Formatter};
-use std::sync::atomic::{AtomicUsize, Ordering};
+use std::fmt::Debug;
+use std::mem::MaybeUninit;

 use crate::hash::HashMapAccess;
 use crate::hash::HashMapInit;
-use crate::hash::UpdateAction;
-use crate::shmem::ShmemHandle;
+use crate::hash::Entry;
+use crate::hash::core::FullError;

 use rand::seq::SliceRandom;
 use rand::{Rng, RngCore};
@@ -35,16 +35,20 @@ impl<'a> From<&'a [u8]> for TestKey {
    }
 }

-fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
-    const MAX_MEM_SIZE: usize = 10000000;
-    let shmem = ShmemHandle::new("test_inserts", 0, MAX_MEM_SIZE).unwrap();
-
-    let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(100000, shmem);
-    let w = init_struct.attach_writer();
+fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {	
+    let w = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		100000, 120000, "test_inserts"
+	).attach_writer();

    for (idx, k) in keys.iter().enumerate() {
-        let res = w.insert(&(*k).into(), idx);
-        assert!(res.is_ok());
+		let res = w.entry((*k).into());
+		match res {
+			Entry::Occupied(mut e) => { e.insert(idx); }
+			Entry::Vacant(e) => {
+				let res = e.insert(idx);
+				assert!(res.is_ok());
+			},
+		};
    }

    for (idx, k) in keys.iter().enumerate() {
@@ -52,8 +56,6 @@ fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
        let value = x.as_deref().copied();
        assert_eq!(value, Some(idx));
    }
-
-    //eprintln!("stats: {:?}", tree_writer.get_statistics());
 }

 #[test]
@@ -92,40 +94,14 @@ fn sparse() {
    test_inserts(&keys);
 }

-struct TestValue(AtomicUsize);
-
-impl TestValue {
-    fn new(val: usize) -> TestValue {
-        TestValue(AtomicUsize::new(val))
-    }
-
-    fn load(&self) -> usize {
-        self.0.load(Ordering::Relaxed)
-    }
-}
-
-impl Clone for TestValue {
-    fn clone(&self) -> TestValue {
-        TestValue::new(self.load())
-    }
-}
-
-impl Debug for TestValue {
-    fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{:?}", self.load())
-    }
-}
-
 #[derive(Clone, Debug)]
 struct TestOp(TestKey, Option<usize>);

 fn apply_op(
    op: &TestOp,
-    sut: &HashMapAccess<TestKey, TestValue>,
+    map: &mut HashMapAccess<TestKey, usize>,
    shadow: &mut BTreeMap<TestKey, usize>,
 ) {
-    eprintln!("applying op: {op:?}");
-
    // apply the change to the shadow tree first
    let shadow_existing = if let Some(v) = op.1 {
        shadow.insert(op.0, v)
@@ -133,33 +109,79 @@ fn apply_op(
        shadow.remove(&op.0)
    };

-    // apply to Art tree
-    sut.update_with_fn(&op.0, |existing| {
-        assert_eq!(existing.map(TestValue::load), shadow_existing);
+	let entry = map.entry(op.0);
+    let hash_existing = match op.1 {
+		Some(new) => {
+			match entry {
+				Entry::Occupied(mut e) => Some(e.insert(new)),
+				Entry::Vacant(e) => { _ = e.insert(new).unwrap(); None },
+			}
+		},
+		None => {
+			match entry {
+				Entry::Occupied(e) => Some(e.remove()),
+				Entry::Vacant(_) => None,
+			}
+		},
+	};

-        match (existing, op.1) {
-            (None, None) => UpdateAction::Nothing,
-            (None, Some(new_val)) => UpdateAction::Insert(TestValue::new(new_val)),
-            (Some(_old_val), None) => UpdateAction::Remove,
-            (Some(old_val), Some(new_val)) => {
-                old_val.0.store(new_val, Ordering::Relaxed);
-                UpdateAction::Nothing
-            }
-        }
-    })
-    .expect("out of memory");
+	assert_eq!(shadow_existing, hash_existing);
+}
+
+fn do_random_ops(
+	num_ops: usize,
+	size: u32,
+	del_prob: f64,
+	writer: &mut HashMapAccess<TestKey, usize>,
+	shadow: &mut BTreeMap<TestKey, usize>,
+	rng: &mut rand::rngs::ThreadRng,
+) {
+	for i in 0..num_ops {
+        let key: TestKey = ((rng.next_u32() % size) as u128).into();
+        let op = TestOp(key, if rng.random_bool(del_prob) { Some(i) } else { None });
+        apply_op(&op, writer, shadow);
+    }
+}
+
+fn do_deletes(
+	num_ops: usize,
+	writer: &mut HashMapAccess<TestKey, usize>,
+	shadow: &mut BTreeMap<TestKey, usize>,
+) {
+	for _ in 0..num_ops {
+		let (k, _) = shadow.pop_first().unwrap();
+		writer.remove(&k);
+	}
+}
+
+fn do_shrink(
+	writer: &mut HashMapAccess<TestKey, usize>,
+	shadow: &mut BTreeMap<TestKey, usize>,
+	to: u32
+) {
+	assert!(writer.shrink_goal().is_none());
+	writer.begin_shrink(to);
+	assert_eq!(writer.shrink_goal(), Some(to as usize));
+	while writer.get_num_buckets_in_use() > to as usize {
+		let (k, _) = shadow.pop_first().unwrap();
+		let entry = writer.entry(k);
+		if let Entry::Occupied(e) = entry {
+			e.remove();
+		}
+	}
+	let old_usage = writer.get_num_buckets_in_use();
+	writer.finish_shrink().unwrap();
+	assert!(writer.shrink_goal().is_none());
+	assert_eq!(writer.get_num_buckets_in_use(), old_usage);
 }

 #[test]
 fn random_ops() {
-    const MAX_MEM_SIZE: usize = 10000000;
-    let shmem = ShmemHandle::new("test_inserts", 0, MAX_MEM_SIZE).unwrap();
-
-    let init_struct = HashMapInit::<TestKey, TestValue>::init_in_shmem(100000, shmem);
-    let writer = init_struct.attach_writer();
-
+	let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		100000, 120000, "test_random"
+	).attach_writer();
    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-
+	
    let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();
    let mut rng = rand::rng();
    for i in 0..100000 {
@@ -167,54 +189,238 @@ fn random_ops() {

        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });

-        apply_op(&op, &writer, &mut shadow);
-
-        if i % 1000 == 0 {
-            eprintln!("{i} ops processed");
-            //eprintln!("stats: {:?}", tree_writer.get_statistics());
-            //test_iter(&tree_writer, &shadow);
-        }
+        apply_op(&op, &mut writer, &mut shadow);
    }
 }

+
+#[test]
+fn test_shuffle() {
+    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		1000, 1200, "test_shuf"
+	).attach_writer();
+    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+    let mut rng = rand::rng();
+
+    do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
+    writer.shuffle();
+	do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
+}
+
 #[test]
 fn test_grow() {
-    const MEM_SIZE: usize = 10000000;
-    let shmem = ShmemHandle::new("test_grow", 0, MEM_SIZE).unwrap();
-
-    let init_struct = HashMapInit::<TestKey, TestValue>::init_in_shmem(1000, shmem);
-    let writer = init_struct.attach_writer();
-
+    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		1000, 2000, "test_grow"
+	).attach_writer();
    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-
    let mut rng = rand::rng();
-    for i in 0..10000 {
-        let key: TestKey = ((rng.next_u32() % 1000) as u128).into();
-
-        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
-
-        apply_op(&op, &writer, &mut shadow);
-
-        if i % 1000 == 0 {
-            eprintln!("{i} ops processed");
-            //eprintln!("stats: {:?}", tree_writer.get_statistics());
-            //test_iter(&tree_writer, &shadow);
-        }
-    }

+    do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
+	let old_usage = writer.get_num_buckets_in_use();
    writer.grow(1500).unwrap();
-
-    for i in 0..10000 {
-        let key: TestKey = ((rng.next_u32() % 1500) as u128).into();
-
-        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
-
-        apply_op(&op, &writer, &mut shadow);
-
-        if i % 1000 == 0 {
-            eprintln!("{i} ops processed");
-            //eprintln!("stats: {:?}", tree_writer.get_statistics());
-            //test_iter(&tree_writer, &shadow);
-        }
-    }
+	assert_eq!(writer.get_num_buckets_in_use(), old_usage);
+	assert_eq!(writer.get_num_buckets(), 1500);
+	do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
+}
+
+#[test]
+fn test_clear() {
+    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		1500, 2000, "test_clear"
+	).attach_writer();
+    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+    let mut rng = rand::rng();
+    do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
+	writer.clear();
+	assert_eq!(writer.get_num_buckets_in_use(), 0);
+	assert_eq!(writer.get_num_buckets(), 1500);
+	while let Some((key, _)) = shadow.pop_first() {
+		assert!(writer.get(&key).is_none());
+	}
+	do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
+	for i in 0..(1500 - writer.get_num_buckets_in_use()) {
+		writer.insert((1500 + i as u128).into(), 0).unwrap();
+	}
+	assert_eq!(writer.insert(5000.into(), 0), Err(FullError {}));
+	writer.clear();
+	assert!(writer.insert(5000.into(), 0).is_ok());
+}
+
+#[test]
+fn test_idx_remove() {
+	let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		1500, 2000, "test_clear"
+	).attach_writer();
+    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+    let mut rng = rand::rng();
+    do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
+	for _ in 0..100 {
+		let idx = (rng.next_u32() % 1500) as usize;
+		if let Some(e) = writer.entry_at_bucket(idx) {
+			shadow.remove(&e._key);
+			e.remove();
+		}
+		
+	}
+	while let Some((key, val)) = shadow.pop_first() {
+		assert_eq!(*writer.get(&key).unwrap(), val);
+	}
+}
+
+#[test]
+fn test_idx_get() {
+	let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		1500, 2000, "test_clear"
+	).attach_writer();
+    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+    let mut rng = rand::rng();
+    do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
+	for _ in 0..100 {
+		let idx = (rng.next_u32() % 1500) as usize;
+		if let Some(pair) = writer.get_at_bucket(idx) {
+			{ 
+				let v: *const usize = &pair.1;
+				assert_eq!(writer.get_bucket_for_value(v), idx);
+			}
+			{
+				let v: *const usize = &pair.1;
+				assert_eq!(writer.get_bucket_for_value(v), idx);
+			}
+		}
+	}
+}
+
+#[test]
+fn test_shrink() {
+    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		1500, 2000, "test_shrink"
+	).attach_writer();
+    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+    let mut rng = rand::rng();
+	
+    do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);	
+	do_shrink(&mut writer, &mut shadow, 1000);	
+	assert_eq!(writer.get_num_buckets(), 1000);
+	do_deletes(500, &mut writer, &mut shadow);
+	do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng);
+	assert!(writer.get_num_buckets_in_use() <= 1000);
+}
+
+#[test]
+fn test_shrink_grow_seq() {
+    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		1000, 20000, "test_grow_seq"
+	).attach_writer();
+    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
+    let mut rng = rand::rng();
+
+    do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng);
+	eprintln!("Shrinking to 750");
+    do_shrink(&mut writer, &mut shadow, 750);
+	do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng);
+	eprintln!("Growing to 1500");
+	writer.grow(1500).unwrap();
+	do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng);
+	eprintln!("Shrinking to 200");
+	while shadow.len() > 100 {
+		do_deletes(1, &mut writer, &mut shadow);
+	}
+	do_shrink(&mut writer, &mut shadow, 200);
+	do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
+	eprintln!("Growing to 10k");
+	writer.grow(10000).unwrap();
+	do_random_ops(10000, 5000, 0.25, &mut writer, &mut shadow, &mut rng);
+}
+
+#[test]
+fn test_bucket_ops() {
+	let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		1000, 1200, "test_bucket_ops"
+	).attach_writer();
+	match writer.entry(1.into()) {
+		Entry::Occupied(mut e) => { e.insert(2); },
+		Entry::Vacant(e) => { _ = e.insert(2).unwrap(); },
+	}
+	assert_eq!(writer.get_num_buckets_in_use(), 1);
+	assert_eq!(writer.get_num_buckets(), 1000);
+	assert_eq!(*writer.get(&1.into()).unwrap(), 2);
+	let pos = match writer.entry(1.into()) {
+		Entry::Occupied(e) => {
+			assert_eq!(e._key, 1.into());
+			let pos = e.bucket_pos as usize;
+			pos
+		},
+		Entry::Vacant(_) => { panic!("Insert didn't affect entry"); },
+	};
+	assert_eq!(writer.entry_at_bucket(pos).unwrap()._key, 1.into());
+	assert_eq!(*writer.get_at_bucket(pos).unwrap(), (1.into(), 2));
+	{
+		let ptr: *const usize = &*writer.get(&1.into()).unwrap();
+		assert_eq!(writer.get_bucket_for_value(ptr), pos);
+	}
+	writer.remove(&1.into());
+	assert!(writer.get(&1.into()).is_none());
+}
+
+#[test]
+fn test_shrink_zero() {
+	let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		1500, 2000, "test_shrink_zero"
+	).attach_writer();
+	writer.begin_shrink(0);
+	for i in 0..1500 {
+		writer.entry_at_bucket(i).map(|x| x.remove());
+	}
+	writer.finish_shrink().unwrap();
+	assert_eq!(writer.get_num_buckets_in_use(), 0);
+	let entry = writer.entry(1.into());
+	if let Entry::Vacant(v) = entry {
+		assert!(v.insert(2).is_err());
+	} else {
+		panic!("Somehow got non-vacant entry in empty map.")
+	}
+	writer.grow(50).unwrap();
+	let entry = writer.entry(1.into());
+	if let Entry::Vacant(v) = entry {
+		assert!(v.insert(2).is_ok());
+	} else {
+		panic!("Somehow got non-vacant entry in empty map.")
+	}
+	assert_eq!(writer.get_num_buckets_in_use(), 1);
+}
+
+#[test]
+#[should_panic]
+fn test_grow_oom() {
+    let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		1500, 2000, "test_grow_oom"
+	).attach_writer();
+	writer.grow(20000).unwrap();
+}
+
+#[test]
+#[should_panic]
+fn test_shrink_bigger() {
+    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		1500, 2500, "test_shrink_bigger"
+	).attach_writer();
+	writer.begin_shrink(2000);
+}
+
+#[test]
+#[should_panic]
+fn test_shrink_early_finish() {
+    let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
+		1500, 2500, "test_shrink_early_finish"
+	).attach_writer();
+	writer.finish_shrink().unwrap();
+}
+
+#[test]
+#[should_panic]
+fn test_shrink_fixed_size() {
+	let mut area = [MaybeUninit::uninit(); 10000];
+    let init_struct = HashMapInit::<TestKey, usize>::with_fixed(3, &mut area);
+    let mut writer = init_struct.attach_writer();
+	writer.begin_shrink(1);
 }
--- a/libs/neon-shmem/src/lib.rs
+++ b/libs/neon-shmem/src/lib.rs
@@ -2,3 +2,4 @@

 pub mod hash;
 pub mod shmem;
+pub mod sync;
--- a/libs/neon-shmem/src/shmem.rs
+++ b/libs/neon-shmem/src/shmem.rs
@@ -12,14 +12,14 @@ use nix::sys::mman::mmap as nix_mmap;
 use nix::sys::mman::munmap as nix_munmap;
 use nix::unistd::ftruncate as nix_ftruncate;

-/// ShmemHandle represents a shared memory area that can be shared by processes over fork().
-/// Unlike shared memory allocated by Postgres, this area is resizable, up to 'max_size' that's
+/// `ShmemHandle` represents a shared memory area that can be shared by processes over `fork()`.
+/// Unlike shared memory allocated by Postgres, this area is resizable, up to `max_size` that's
 /// specified at creation.
 ///
-/// The area is backed by an anonymous file created with memfd_create(). The full address space for
-/// 'max_size' is reserved up-front with mmap(), but whenever you call [`ShmemHandle::set_size`],
+/// The area is backed by an anonymous file created with `memfd_create()`. The full address space for
+/// `max_size` is reserved up-front with `mmap()`, but whenever you call [`ShmemHandle::set_size`],
 /// the underlying file is resized. Do not access the area beyond the current size. Currently, that
-/// will cause the file to be expanded, but we might use mprotect() etc. to enforce that in the
+/// will cause the file to be expanded, but we might use `mprotect()` etc. to enforce that in the
 /// future.
 pub struct ShmemHandle {
    /// memfd file descriptor
@@ -38,7 +38,7 @@ pub struct ShmemHandle {
 struct SharedStruct {
    max_size: usize,

-    /// Current size of the backing file. The high-order bit is used for the RESIZE_IN_PROGRESS flag
+    /// Current size of the backing file. The high-order bit is used for the [`RESIZE_IN_PROGRESS`] flag.
    current_size: AtomicUsize,
 }

@@ -46,7 +46,7 @@ const RESIZE_IN_PROGRESS: usize = 1 << 63;

 const HEADER_SIZE: usize = std::mem::size_of::<SharedStruct>();

-/// Error type returned by the ShmemHandle functions.
+/// Error type returned by the [`ShmemHandle`] functions.
 #[derive(thiserror::Error, Debug)]
 #[error("{msg}: {errno}")]
 pub struct Error {
@@ -55,8 +55,8 @@ pub struct Error {
 }

 impl Error {
-    fn new(msg: &str, errno: Errno) -> Error {
-        Error {
+    fn new(msg: &str, errno: Errno) -> Self {
+        Self {
            msg: msg.to_string(),
            errno,
        }
@@ -65,11 +65,11 @@ impl Error {

 impl ShmemHandle {
    /// Create a new shared memory area. To communicate between processes, the processes need to be
-    /// fork()'d after calling this, so that the ShmemHandle is inherited by all processes.
+    /// `fork()`'d after calling this, so that the `ShmemHandle` is inherited by all processes.
    ///
-    /// If the ShmemHandle is dropped, the memory is unmapped from the current process. Other
+    /// If the `ShmemHandle` is dropped, the memory is unmapped from the current process. Other
    /// processes can continue using it, however.
-    pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<ShmemHandle, Error> {
+    pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<Self, Error> {
        // create the backing anonymous file.
        let fd = create_backing_file(name)?;

@@ -80,17 +80,17 @@ impl ShmemHandle {
        fd: OwnedFd,
        initial_size: usize,
        max_size: usize,
-    ) -> Result<ShmemHandle, Error> {
-        // We reserve the high-order bit for the RESIZE_IN_PROGRESS flag, and the actual size
+    ) -> Result<Self, Error> {
+        // We reserve the high-order bit for the `RESIZE_IN_PROGRESS` flag, and the actual size
        // is a little larger than this because of the SharedStruct header. Make the upper limit
        // somewhat smaller than that, because with anything close to that, you'll run out of
        // memory anyway.
-        if max_size >= 1 << 48 {
-            panic!("max size {max_size} too large");
-        }
-        if initial_size > max_size {
-            panic!("initial size {initial_size} larger than max size {max_size}");
-        }
+        assert!(max_size < 1 << 48, "max size {max_size} too large");
+        
+        assert!(
+			initial_size <= max_size,
+            "initial size {initial_size} larger than max size {max_size}"
+        );

        // The actual initial / max size is the one given by the caller, plus the size of
        // 'SharedStruct'.
@@ -110,7 +110,7 @@ impl ShmemHandle {
                0,
            )
        }
-        .map_err(|e| Error::new("mmap failed: {e}", e))?;
+        .map_err(|e| Error::new("mmap failed", e))?;

        // Reserve space for the initial size
        enlarge_file(fd.as_fd(), initial_size as u64)?;
@@ -121,13 +121,13 @@ impl ShmemHandle {
            shared.write(SharedStruct {
                max_size: max_size.into(),
                current_size: AtomicUsize::new(initial_size),
-            })
-        };
+            });
+        }

        // The user data begins after the header
        let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) };

-        Ok(ShmemHandle {
+        Ok(Self {
            fd,
            max_size: max_size.into(),
            shared_ptr: shared,
@@ -140,28 +140,28 @@ impl ShmemHandle {
        unsafe { self.shared_ptr.as_ref() }
    }

-    /// Resize the shared memory area. 'new_size' must not be larger than the 'max_size' specified
+    /// Resize the shared memory area. `new_size` must not be larger than the `max_size` specified
    /// when creating the area.
    ///
    /// This may only be called from one process/thread concurrently. We detect that case
-    /// and return an Error.
+    /// and return an [`shmem::Error`](Error).
    pub fn set_size(&self, new_size: usize) -> Result<(), Error> {
        let new_size = new_size + HEADER_SIZE;
        let shared = self.shared();

-        if new_size > self.max_size {
-            panic!(
-                "new size ({} is greater than max size ({})",
-                new_size, self.max_size
-            );
-        }
-        assert_eq!(self.max_size, shared.max_size);
+        assert!(
+			new_size <= self.max_size,
+            "new size ({new_size}) is greater than max size ({})",
+			self.max_size
+        );

-        // Lock the area by setting the bit in 'current_size'
+		assert_eq!(self.max_size, shared.max_size);
+
+        // Lock the area by setting the bit in `current_size`
        //
        // Ordering::Relaxed would probably be sufficient here, as we don't access any other memory
-        // and the posix_fallocate/ftruncate call is surely a synchronization point anyway. But
-        // since this is not performance-critical, better safe than sorry .
+        // and the `posix_fallocate`/`ftruncate` call is surely a synchronization point anyway. But
+        // since this is not performance-critical, better safe than sorry.
        let mut old_size = shared.current_size.load(Ordering::Acquire);
        loop {
            if (old_size & RESIZE_IN_PROGRESS) != 0 {
@@ -188,7 +188,7 @@ impl ShmemHandle {
            use std::cmp::Ordering::{Equal, Greater, Less};
            match new_size.cmp(&old_size) {
                Less => nix_ftruncate(&self.fd, new_size as i64).map_err(|e| {
-                    Error::new("could not shrink shmem segment, ftruncate failed: {e}", e)
+                    Error::new("could not shrink shmem segment, ftruncate failed", e)
                }),
                Equal => Ok(()),
                Greater => enlarge_file(self.fd.as_fd(), new_size as u64),
@@ -206,8 +206,8 @@ impl ShmemHandle {

    /// Returns the current user-visible size of the shared memory segment.
    ///
-    /// NOTE: a concurrent set_size() call can change the size at any time. It is the caller's
-    /// responsibility not to access the area beyond the current size.
+    /// NOTE: a concurrent [`ShmemHandle::set_size()`] call can change the size at any time.
+	/// It is the caller's responsibility not to access the area beyond the current size.
    pub fn current_size(&self) -> usize {
        let total_current_size =
            self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS;
@@ -224,23 +224,23 @@ impl Drop for ShmemHandle {
    }
 }

-/// Create a "backing file" for the shared memory area. On Linux, use memfd_create(), to create an
+/// Create a "backing file" for the shared memory area. On Linux, use `memfd_create()`, to create an
 /// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for
 /// development and testing, but in production we want the file to stay in memory.
 ///
-/// disable 'unused_variables' warnings, because in the macos path, 'name' is unused.
+/// Disable unused variables warnings because `name` is unused in the macos path.
 #[allow(unused_variables)]
 fn create_backing_file(name: &str) -> Result<OwnedFd, Error> {
    #[cfg(not(target_os = "macos"))]
    {
        nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty())
-            .map_err(|e| Error::new("memfd_create failed: {e}", e))
+            .map_err(|e| Error::new("memfd_create failed", e))
    }
    #[cfg(target_os = "macos")]
    {
        let file = tempfile::tempfile().map_err(|e| {
            Error::new(
-                "could not create temporary file to back shmem area: {e}",
+                "could not create temporary file to back shmem area",
                nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)),
            )
        })?;
@@ -255,7 +255,7 @@ fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {
    {
        nix::fcntl::posix_fallocate(fd, 0, size as i64).map_err(|e| {
            Error::new(
-                "could not grow shmem segment, posix_fallocate failed: {e}",
+                "could not grow shmem segment, posix_fallocate failed",
                e,
            )
        })
@@ -264,7 +264,7 @@ fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {
    #[cfg(target_os = "macos")]
    {
        nix::unistd::ftruncate(fd, size as i64)
-            .map_err(|e| Error::new("could not grow shmem segment, ftruncate failed: {e}", e))
+            .map_err(|e| Error::new("could not grow shmem segment, ftruncate failed", e))
    }
 }

@@ -330,7 +330,7 @@ mod tests {
        Ok(())
    }

-    /// This is used in tests to coordinate between test processes. It's like std::sync::Barrier,
+    /// This is used in tests to coordinate between test processes. It's like `std::sync::Barrier`,
    /// but is stored in the shared memory area and works across processes. It's implemented by
    /// polling, because e.g. standard rust mutexes are not guaranteed to work across processes.
    struct SimpleBarrier {
--- a/libs/neon-shmem/src/sync.rs
+++ b/libs/neon-shmem/src/sync.rs
@@ -0,0 +1,105 @@
+//! Simple utilities akin to what's in [`std::sync`] but designed to work with shared memory.
+
+use std::mem::MaybeUninit;
+use std::ptr::NonNull;
+
+use nix::errno::Errno;
+
+pub type RwLock<T> = lock_api::RwLock<PthreadRwLock, T>;
+pub(crate) type RwLockReadGuard<'a, T> = lock_api::RwLockReadGuard<'a, PthreadRwLock, T>;
+pub type RwLockWriteGuard<'a, T> = lock_api::RwLockWriteGuard<'a, PthreadRwLock, T>;
+pub type ValueReadGuard<'a, T> = lock_api::MappedRwLockReadGuard<'a, PthreadRwLock, T>;
+pub type ValueWriteGuard<'a, T> = lock_api::MappedRwLockWriteGuard<'a, PthreadRwLock, T>;
+
+/// Shared memory read-write lock.
+pub struct PthreadRwLock(Option<NonNull<libc::pthread_rwlock_t>>);
+
+impl PthreadRwLock {
+	pub fn new(lock: *mut libc::pthread_rwlock_t) -> Self {
+		unsafe {
+			let mut attrs = MaybeUninit::uninit();
+			// Ignoring return value here - only possible error is OOM.
+			libc::pthread_rwlockattr_init(attrs.as_mut_ptr());
+			libc::pthread_rwlockattr_setpshared(
+				attrs.as_mut_ptr(),
+				libc::PTHREAD_PROCESS_SHARED
+			);
+			// TODO(quantumish): worth making this function return Result?
+			libc::pthread_rwlock_init(lock, attrs.as_mut_ptr());
+			// Safety: POSIX specifies that "any function affecting the attributes
+			// object (including destruction) shall not affect any previously
+			// initialized read-write locks". 
+			libc::pthread_rwlockattr_destroy(attrs.as_mut_ptr());
+			Self(Some(NonNull::new_unchecked(lock)))
+		}
+	}
+	
+	fn inner(&self) -> NonNull<libc::pthread_rwlock_t> {
+		match self.0 {
+			None => panic!("PthreadRwLock constructed badly - something likely used RawMutex::INIT"),
+			Some(x) => x,
+		}
+	}
+}
+
+unsafe impl lock_api::RawRwLock for PthreadRwLock {
+	type GuardMarker = lock_api::GuardSend;
+	const INIT: Self = Self(None);	
+	
+	fn lock_shared(&self) {
+		unsafe {
+			let res = libc::pthread_rwlock_rdlock(self.inner().as_ptr());
+			if res != 0 {
+				panic!("rdlock failed with {}", Errno::from_raw(res));
+			}
+		}
+	}
+
+	fn try_lock_shared(&self) -> bool {
+		unsafe {
+			let res = libc::pthread_rwlock_tryrdlock(self.inner().as_ptr());
+			match res {
+				0 => true,
+				libc::EAGAIN => false,
+				_ => panic!("try_rdlock failed with {}", Errno::from_raw(res)),
+			}
+		}
+	}
+
+	fn lock_exclusive(&self) {
+		unsafe {
+			let res = libc::pthread_rwlock_wrlock(self.inner().as_ptr());
+			if res != 0 {
+				panic!("wrlock failed with {}", Errno::from_raw(res));
+			}
+		}
+	}
+
+	fn try_lock_exclusive(&self) -> bool {
+		unsafe {
+			let res = libc::pthread_rwlock_trywrlock(self.inner().as_ptr());
+			match res {
+				0 => true,
+				libc::EAGAIN => false,
+				_ => panic!("try_wrlock failed with {}", Errno::from_raw(res)),
+			}
+		}
+	}
+
+	unsafe fn unlock_exclusive(&self) {
+		unsafe { 
+			let res = libc::pthread_rwlock_unlock(self.inner().as_ptr());
+			if res != 0 {
+				panic!("unlock failed with {}", Errno::from_raw(res));
+			}
+		}
+	}
+	unsafe fn unlock_shared(&self) {
+		unsafe {
+			let res = libc::pthread_rwlock_unlock(self.inner().as_ptr());
+			if res != 0 {
+				panic!("unlock failed with {}", Errno::from_raw(res));
+			}
+		}
+	}
+}
--- a/pgxn/neon/communicator/src/integrated_cache.rs
+++ b/pgxn/neon/communicator/src/integrated_cache.rs
@@ -33,8 +33,7 @@ use pageserver_page_api::RelTag;

 use metrics::{IntCounter, IntGauge};

-use neon_shmem::hash::HashMapInit;
-use neon_shmem::hash::UpdateAction;
+use neon_shmem::hash::{HashMapInit, entry::Entry};
 use neon_shmem::shmem::ShmemHandle;

 // in # of entries
@@ -95,7 +94,7 @@ impl<'t> IntegratedCacheInitStruct<'t> {
    ) -> IntegratedCacheInitStruct<'t> {
        // Initialize the relsize cache in the fixed-size area
        let relsize_cache_handle =
-            neon_shmem::hash::HashMapInit::init_in_fixed_area(RELSIZE_CACHE_SIZE, shmem_area);
+            neon_shmem::hash::HashMapInit::with_fixed(RELSIZE_CACHE_SIZE, shmem_area);

        let max_bytes =
            HashMapInit::<BlockKey, BlockEntry>::estimate_size(max_file_cache_size as u32);
@@ -103,7 +102,7 @@ impl<'t> IntegratedCacheInitStruct<'t> {
        // Initialize the block map in a separate resizable shared memory area
        let shmem_handle = ShmemHandle::new("block mapping", 0, max_bytes).unwrap();

-        let block_map_handle = neon_shmem::hash::HashMapInit::init_in_shmem(
+        let block_map_handle = neon_shmem::hash::HashMapInit::with_shmem(
            initial_file_cache_size as u32,
            shmem_handle,
        );
@@ -343,24 +342,19 @@ impl<'t> IntegratedCacheWriteAccess<'t> {
    }

    pub fn remember_rel_size(&'t self, rel: &RelTag, nblocks: u32) {
-        let result =
-            self.relsize_cache
-                .update_with_fn(&RelKey::from(rel), |existing| match existing {
-                    None => {
-                        tracing::info!("inserting rel entry for {rel:?}, {nblocks} blocks");
-                        UpdateAction::Insert(RelEntry {
-                            nblocks: AtomicU32::new(nblocks),
-                        })
-                    }
-                    Some(e) => {
-                        tracing::info!("updating rel entry for {rel:?}, {nblocks} blocks");
-                        e.nblocks.store(nblocks, Ordering::Relaxed);
-                        UpdateAction::Nothing
-                    }
-                });
-
-        // FIXME: what to do if we run out of memory? Evict other relation entries?
-        result.expect("out of memory");
+        match self.relsize_cache.entry(RelKey::from(rel)) {
+			Entry::Vacant(e) => {
+				tracing::info!("inserting rel entry for {rel:?}, {nblocks} blocks");
+				// FIXME: what to do if we run out of memory? Evict other relation entries?
+                _ = e.insert(RelEntry {
+                    nblocks: AtomicU32::new(nblocks),
+                }).expect("out of memory");									
+			},
+			Entry::Occupied(e) => {
+                tracing::info!("updating rel entry for {rel:?}, {nblocks} blocks");
+				e.get().nblocks.store(nblocks, Ordering::Relaxed);
+            }
+		};        
    }

    /// Remember the given page contents in the cache.
@@ -386,34 +380,29 @@ impl<'t> IntegratedCacheWriteAccess<'t> {
            let mut old_cache_block = None;
            let mut found_existing = false;

-            let res = self.block_map.update_with_fn(&key, |existing| {
-                if let Some(block_entry) = existing {
-                    found_existing = true;
-
-                    // Prevent this entry from being evicted
-                    let pin_count = block_entry.pinned.fetch_add(1, Ordering::Relaxed);
-                    if pin_count > 0 {
-                        // this is unexpected, because the caller has obtained the io-in-progress lock,
-                        // so no one else should try to modify the page at the same time.
-                        // XXX: and I think a read should not be happening either, because the postgres
-                        // buffer is held locked. TODO: check these conditions and tidy this up a little. Seems fragile to just panic.
-                        panic!("block entry was unexpectedly pinned");
-                    }
-
-                    let cache_block = block_entry.cache_block.load(Ordering::Relaxed);
-                    old_cache_block = if cache_block != INVALID_CACHE_BLOCK {
-                        Some(cache_block)
-                    } else {
-                        None
-                    };
+			// NOTE(quantumish): honoring original semantics here (used to be update_with_fn)
+			// but I don't see any reason why this has to take a write lock.
+            if let Entry::Occupied(e) = self.block_map.entry(key.clone()) {
+				let block_entry = e.get();
+                found_existing = true;
+				
+                // Prevent this entry from being evicted
+                let pin_count = block_entry.pinned.fetch_add(1, Ordering::Relaxed);
+                if pin_count > 0 {
+                    // this is unexpected, because the caller has obtained the io-in-progress lock,
+                    // so no one else should try to modify the page at the same time.
+                    // XXX: and I think a read should not be happening either, because the postgres
+                    // buffer is held locked. TODO: check these conditions and tidy this up a little. Seems fragile to just panic.
+                    panic!("block entry was unexpectedly pinned");
                }
-                // if there was no existing entry, we will insert one, but not yet
-                UpdateAction::Nothing
-            });
-
-            // FIXME: what to do if we run out of memory? Evict other relation entries? Remove
-            // block entries first?
-            res.expect("out of memory");
+				
+                let cache_block = block_entry.cache_block.load(Ordering::Relaxed);
+                old_cache_block = if cache_block != INVALID_CACHE_BLOCK {
+                    Some(cache_block)
+                } else {
+                    None
+                };
+            }

            // Allocate a new block if required
            let cache_block = old_cache_block.unwrap_or_else(|| {
@@ -436,9 +425,11 @@ impl<'t> IntegratedCacheWriteAccess<'t> {
            // FIXME: unpin the block entry on error

            // Update the block entry
-            let res = self.block_map.update_with_fn(&key, |existing| {
-                assert_eq!(found_existing, existing.is_some());
-                if let Some(block_entry) = existing {
+			let entry = self.block_map.entry(key);
+			assert_eq!(found_existing, matches!(entry, Entry::Occupied(_)));
+            match entry {
+				Entry::Occupied(e) => {
+					let block_entry = e.get();
                    // Update the cache block
                    let old_blk = block_entry.cache_block.compare_exchange(
                        INVALID_CACHE_BLOCK,
@@ -454,20 +445,18 @@ impl<'t> IntegratedCacheWriteAccess<'t> {

                    let pin_count = block_entry.pinned.fetch_sub(1, Ordering::Relaxed);
                    assert!(pin_count > 0);
-                    UpdateAction::Nothing
-                } else {
-                    UpdateAction::Insert(BlockEntry {
+				}
+				Entry::Vacant(e) => {
+					// FIXME: what to do if we run out of memory? Evict other relation entries? Remove
+					// block entries first?
+                    _ = e.insert(BlockEntry {
                        lw_lsn: AtomicLsn::new(lw_lsn.0),
                        cache_block: AtomicU64::new(cache_block),
                        pinned: AtomicU64::new(0),
                        referenced: AtomicBool::new(true),
-                    })
+                    }).expect("out of memory");
                }
-            });
-
-            // FIXME: what to do if we run out of memory? Evict other relation entries? Remove
-            // block entries first?
-            res.expect("out of memory");
+			}
        } else {
            // !is_write
            //
@@ -494,29 +483,28 @@ impl<'t> IntegratedCacheWriteAccess<'t> {
                .expect("error writing to cache");
            // FIXME: handle errors gracefully.

-            let res = self.block_map.update_with_fn(&key, |existing| {
-                if let Some(block_entry) = existing {
-                    // FIXME: could there be concurrent readers?
+			match self.block_map.entry(key) { 
+				Entry::Occupied(e) => {
+					let block_entry = e.get();
+					// FIXME: could there be concurrent readers?
                    assert!(block_entry.pinned.load(Ordering::Relaxed) == 0);

                    let old_cache_block = block_entry.cache_block.swap(cache_block, Ordering::Relaxed);
                    if old_cache_block != INVALID_CACHE_BLOCK {
                        panic!("remember_page called in !is_write mode, but page is already cached at blk {old_cache_block}");
                    }
-                    UpdateAction::Nothing
-                } else {
-                    UpdateAction::Insert(BlockEntry {
+                },
+				Entry::Vacant(e) => {
+					// FIXME: what to do if we run out of memory? Evict other relation entries? Remove
+					// block entries first?
+					_ = e.insert(BlockEntry {
                        lw_lsn: AtomicLsn::new(lw_lsn.0),
                        cache_block: AtomicU64::new(cache_block),
                        pinned: AtomicU64::new(0),
                        referenced: AtomicBool::new(true),
-                    })
+                    }).expect("out of memory");
                }
-            });
-
-            // FIXME: what to do if we run out of memory? Evict other relation entries? Remove
-            // block entries first?
-            res.expect("out of memory");
+            }
        }
    }

@@ -585,13 +573,13 @@ impl<'t> IntegratedCacheWriteAccess<'t> {
            let num_buckets = self.block_map.get_num_buckets();
            match self
                .block_map
-                .get_bucket((*clock_hand) % num_buckets)
+                .get_at_bucket((*clock_hand) % num_buckets)
                .as_deref()
            {
                None => {
                    // This bucket was unused
                }
-                Some(blk_entry) => {
+                Some((_, blk_entry)) => {
                    if !blk_entry.referenced.swap(false, Ordering::Relaxed) {
                        // Evict this. Maybe.
                        evict_this = true;
@@ -602,37 +590,25 @@ impl<'t> IntegratedCacheWriteAccess<'t> {
            if evict_this {
                // grab the write lock
                let mut evicted_cache_block = None;
-                let res =
-                    self.block_map
-                        .update_with_fn_at_bucket(*clock_hand % num_buckets, |old| {
-                            match old {
-                                None => UpdateAction::Nothing,
-                                Some(old) => {
-                                    // note: all the accesses to 'pinned' currently happen
-                                    // within update_with_fn(), or while holding ValueReadGuard, which protects from concurrent
-                                    // updates. Otherwise, another thread could set the 'pinned'
-                                    // flag just after we have checked it here.
-                                    if old.pinned.load(Ordering::Relaxed) != 0 {
-                                        return UpdateAction::Nothing;
-                                    }
-
-                                    let _ = self
-                                        .global_lw_lsn
-                                        .fetch_max(old.lw_lsn.load().0, Ordering::Relaxed);
-                                    let cache_block = old
-                                        .cache_block
-                                        .swap(INVALID_CACHE_BLOCK, Ordering::Relaxed);
-                                    if cache_block != INVALID_CACHE_BLOCK {
-                                        evicted_cache_block = Some(cache_block);
-                                    }
-                                    UpdateAction::Remove
-                                }
-                            }
-                        });
-
-                // Out of memory should not happen here, as we're only updating existing values,
-                // not inserting new entries to the map.
-                res.expect("out of memory");
+                if let Some(e) = self.block_map.entry_at_bucket(*clock_hand % num_buckets) {
+					let old = e.get();
+					// note: all the accesses to 'pinned' currently happen
+                    // within update_with_fn(), or while holding ValueReadGuard, which protects from concurrent
+                    // updates. Otherwise, another thread could set the 'pinned'
+                    // flag just after we have checked it here.
+                    if old.pinned.load(Ordering::Relaxed) == 0 {
+						let _ = self
+							.global_lw_lsn
+							.fetch_max(old.lw_lsn.load().0, Ordering::Relaxed);
+						let cache_block = old
+							.cache_block
+							.swap(INVALID_CACHE_BLOCK, Ordering::Relaxed);
+						if cache_block != INVALID_CACHE_BLOCK {
+							evicted_cache_block = Some(cache_block);
+						}
+						e.remove();
+					}
+                }

                if evicted_cache_block.is_some() {
                    self.page_evictions_counter.inc();