From b1e3161d4e0f602e0f56efa854a171312c00d6a1 Mon Sep 17 00:00:00 2001 From: David Freifeld Date: Thu, 26 Jun 2025 14:32:32 -0700 Subject: [PATCH 1/7] Satisfy `cargo clippy` lints, simplify shrinking API --- libs/neon-shmem/src/hash.rs | 193 ++++++++++++++++++------------ libs/neon-shmem/src/hash/core.rs | 24 ++-- libs/neon-shmem/src/hash/entry.rs | 28 +++-- libs/neon-shmem/src/hash/tests.rs | 40 +------ libs/neon-shmem/src/shmem.rs | 92 +++++++------- 5 files changed, 200 insertions(+), 177 deletions(-) diff --git a/libs/neon-shmem/src/hash.rs b/libs/neon-shmem/src/hash.rs index a3d465db93..36fbb1112c 100644 --- a/libs/neon-shmem/src/hash.rs +++ b/libs/neon-shmem/src/hash.rs @@ -1,8 +1,8 @@ -//! Resizable hash table implementation on top of byte-level storage (either `shmem` or fixed byte array). +//! Resizable hash table implementation on top of byte-level storage (either a [`ShmemHandle`] or a fixed byte array). //! //! This hash table has two major components: the bucket array and the dictionary. Each bucket within the -//! bucket array contains a Option<(K, V)> and an index of another bucket. In this way there is both an -//! implicit freelist within the bucket array (None buckets point to other None entries) and various hash +//! bucket array contains a `Option<(K, V)>` and an index of another bucket. In this way there is both an +//! implicit freelist within the bucket array (`None` buckets point to other `None` entries) and various hash //! chains within the bucket array (a Some bucket will point to other Some buckets that had the same hash). //! //! Buckets are never moved unless they are within a region that is being shrunk, and so the actual hash- @@ -10,14 +10,15 @@ //! within the dictionary is decided based on its hash, the data is inserted into an empty bucket based //! off of the freelist, and then the index of said bucket is placed in the dictionary. //! -//! This map is resizable (if initialized on top of a `ShmemHandle`). Both growing and shrinking happen +//! This map is resizable (if initialized on top of a [`ShmemHandle`]). Both growing and shrinking happen //! in-place and are at a high level achieved by expanding/reducing the bucket array and rebuilding the //! dictionary by rehashing all keys. use std::hash::{Hash, BuildHasher}; use std::mem::MaybeUninit; +use std::default::Default; -use crate::shmem::ShmemHandle; +use crate::{shmem, shmem::ShmemHandle}; mod core; pub mod entry; @@ -28,11 +29,13 @@ mod tests; use core::{Bucket, CoreHashMap, INVALID_POS}; use entry::{Entry, OccupiedEntry}; -/// Builder for a `HashMapAccess`. +/// Builder for a [`HashMapAccess`]. +#[must_use] pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> { shmem_handle: Option, shared_ptr: *mut HashMapShared<'a, K, V>, shared_size: usize, + shrink_mode: HashMapShrinkMode, hasher: S, num_buckets: u32, } @@ -42,12 +45,34 @@ pub struct HashMapAccess<'a, K, V, S = rustc_hash::FxBuildHasher> { shmem_handle: Option, shared_ptr: *mut HashMapShared<'a, K, V>, hasher: S, + shrink_mode: HashMapShrinkMode, } -unsafe impl<'a, K: Sync, V: Sync, S> Sync for HashMapAccess<'a, K, V, S> {} -unsafe impl<'a, K: Send, V: Send, S> Send for HashMapAccess<'a, K, V, S> {} +/// Enum specifying what behavior to have surrounding occupied entries in what is +/// about-to-be-shrinked space during a call to [`HashMapAccess::finish_shrink`]. +#[derive(PartialEq, Eq)] +pub enum HashMapShrinkMode { + /// Remap entry to the range of buckets that will remain after shrinking. + /// + /// Requires that caller has left enough room within the map such that this is possible. + Remap, + /// Remove any entries remaining in soon to be deallocated space. + /// + /// Only really useful if you legitimately do not care what entries are removed. + /// Should primarily be used for testing. + Remove, +} -impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> { +impl Default for HashMapShrinkMode { + fn default() -> Self { + Self::Remap + } +} + +unsafe impl Sync for HashMapAccess<'_, K, V, S> {} +unsafe impl Send for HashMapAccess<'_, K, V, S> {} + +impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> { pub fn with_hasher(self, hasher: T) -> HashMapInit<'a, K, V, T> { HashMapInit { hasher, @@ -55,9 +80,14 @@ impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> { shared_ptr: self.shared_ptr, shared_size: self.shared_size, num_buckets: self.num_buckets, + shrink_mode: self.shrink_mode, } } + pub fn with_shrink_mode(self, mode: HashMapShrinkMode) -> Self { + Self { shrink_mode: mode, ..self } + } + /// Loosely (over)estimate the size needed to store a hash table with `num_buckets` buckets. pub fn estimate_size(num_buckets: u32) -> usize { // add some margin to cover alignment etc. @@ -98,11 +128,12 @@ impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> { HashMapAccess { shmem_handle: self.shmem_handle, shared_ptr: self.shared_ptr, + shrink_mode: self.shrink_mode, hasher: self.hasher, } } - /// Initialize a table for reading. Currently identical to `attach_writer`. + /// Initialize a table for reading. Currently identical to [`HashMapInit::attach_writer`]. pub fn attach_reader(self) -> HashMapAccess<'a, K, V, S> { self.attach_writer() } @@ -114,7 +145,7 @@ impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> { /// relies on the memory layout! The data structures are laid out in the contiguous shared memory /// area as follows: /// -/// HashMapShared +/// [`HashMapShared`] /// [buckets] /// [dictionary] /// @@ -131,18 +162,22 @@ where pub fn with_fixed( num_buckets: u32, area: &'a mut [MaybeUninit], - ) -> HashMapInit<'a, K, V> { + ) -> Self { Self { num_buckets, shmem_handle: None, shared_ptr: area.as_mut_ptr().cast(), shared_size: area.len(), - hasher: rustc_hash::FxBuildHasher::default(), + shrink_mode: HashMapShrinkMode::default(), + hasher: rustc_hash::FxBuildHasher, } } /// Place a new hash map in the given shared memory area - pub fn with_shmem(num_buckets: u32, shmem: ShmemHandle) -> HashMapInit<'a, K, V> { + /// + /// # Panics + /// Will panic on failure to resize area to expected map size. + pub fn with_shmem(num_buckets: u32, shmem: ShmemHandle) -> Self { let size = Self::estimate_size(num_buckets); shmem .set_size(size) @@ -152,12 +187,13 @@ where shared_ptr: shmem.data_ptr.as_ptr().cast(), shmem_handle: Some(shmem), shared_size: size, - hasher: rustc_hash::FxBuildHasher::default() + shrink_mode: HashMapShrinkMode::default(), + hasher: rustc_hash::FxBuildHasher } } /// Make a resizable hash map within a new shared memory area with the given name. - pub fn new_resizeable_named(num_buckets: u32, max_buckets: u32, name: &str) -> HashMapInit<'a, K, V> { + pub fn new_resizeable_named(num_buckets: u32, max_buckets: u32, name: &str) -> Self { let size = Self::estimate_size(num_buckets); let max_size = Self::estimate_size(max_buckets); let shmem = ShmemHandle::new(name, size, max_size) @@ -168,16 +204,17 @@ where shared_ptr: shmem.data_ptr.as_ptr().cast(), shmem_handle: Some(shmem), shared_size: size, - hasher: rustc_hash::FxBuildHasher::default() + shrink_mode: HashMapShrinkMode::default(), + hasher: rustc_hash::FxBuildHasher } } /// Make a resizable hash map within a new anonymous shared memory area. - pub fn new_resizeable(num_buckets: u32, max_buckets: u32) -> HashMapInit<'a, K, V> { + pub fn new_resizeable(num_buckets: u32, max_buckets: u32) -> Self { use std::sync::atomic::{AtomicUsize, Ordering}; - const COUNTER: AtomicUsize = AtomicUsize::new(0); + static COUNTER: AtomicUsize = AtomicUsize::new(0); let val = COUNTER.fetch_add(1, Ordering::Relaxed); - let name = format!("neon_shmem_hmap{}", val); + let name = format!("neon_shmem_hmap{val}"); Self::new_resizeable_named(num_buckets, max_buckets, &name) } } @@ -214,7 +251,7 @@ where e.remove(); } Entry::Vacant(_) => {} - }; + } } /// Optionally return the entry for a bucket at a given index if it exists. @@ -249,7 +286,7 @@ where let map = unsafe { self.shared_ptr.as_ref() }.unwrap(); let origin = map.inner.buckets.as_ptr(); - let idx = (val_ptr as usize - origin as usize) / (size_of::>() as usize); + let idx = (val_ptr as usize - origin as usize) / size_of::>(); assert!(idx < map.inner.buckets.len()); idx @@ -265,14 +302,14 @@ where pub fn clear(&mut self) { let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); let inner = &mut map.inner; - inner.clear() + inner.clear(); } /// Perform an in-place rehash of some region (0..`rehash_buckets`) of the table and reset /// the `buckets` and `dictionary` slices to be as long as `num_buckets`. Resets the freelist /// in the process. fn rehash_dict( - &mut self, + &self, inner: &mut CoreHashMap<'a, K, V>, buckets_ptr: *mut core::Bucket, end_ptr: *mut u8, @@ -293,22 +330,21 @@ where buckets = std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize); dictionary = std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size); - (dictionary_ptr, dictionary_size) } - for i in 0..dictionary.len() { - dictionary[i] = INVALID_POS; + for e in dictionary.iter_mut() { + *e = INVALID_POS; } - for i in 0..rehash_buckets as usize { - if buckets[i].inner.is_none() { - buckets[i].next = inner.free_head; + for (i, bucket) in buckets.iter_mut().enumerate().take(rehash_buckets as usize) { + if bucket.inner.is_none() { + bucket.next = inner.free_head; inner.free_head = i as u32; continue; } - let hash = self.hasher.hash_one(&buckets[i].inner.as_ref().unwrap().0); + let hash = self.hasher.hash_one(&bucket.inner.as_ref().unwrap().0); let pos: usize = (hash % dictionary.len() as u64) as usize; - buckets[i].next = dictionary[pos]; + bucket.next = dictionary[pos]; dictionary[pos] = i as u32; } @@ -322,7 +358,7 @@ where let inner = &mut map.inner; let num_buckets = inner.get_num_buckets() as u32; let size_bytes = HashMapInit::::estimate_size(num_buckets); - let end_ptr: *mut u8 = unsafe { (self.shared_ptr as *mut u8).add(size_bytes) }; + let end_ptr: *mut u8 = unsafe { self.shared_ptr.byte_add(size_bytes).cast() }; let buckets_ptr = inner.buckets.as_mut_ptr(); self.rehash_dict(inner, buckets_ptr, end_ptr, num_buckets, num_buckets); } @@ -332,14 +368,18 @@ where /// 1. Grows the underlying shared memory area /// 2. Initializes new buckets and overwrites the current dictionary /// 3. Rehashes the dictionary - pub fn grow(&mut self, num_buckets: u32) -> Result<(), crate::shmem::Error> { + /// + /// # Panics + /// Panics if called on a map initialized with [`HashMapInit::with_fixed`]. + /// + /// # Errors + /// Returns an [`shmem::Error`] if any errors occur resizing the memory region. + pub fn grow(&mut self, num_buckets: u32) -> Result<(), shmem::Error> { let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); let inner = &mut map.inner; let old_num_buckets = inner.buckets.len() as u32; - if num_buckets < old_num_buckets { - panic!("grow called with a smaller number of buckets"); - } + assert!(num_buckets >= old_num_buckets, "grow called with a smaller number of buckets"); if num_buckets == old_num_buckets { return Ok(()); } @@ -352,15 +392,15 @@ where shmem_handle.set_size(size_bytes)?; let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) }; - // Initialize new buckets. The new buckets are linked to the free list. NB: This overwrites - // the dictionary! + // Initialize new buckets. The new buckets are linked to the free list. + // NB: This overwrites the dictionary! let buckets_ptr = inner.buckets.as_mut_ptr(); unsafe { for i in old_num_buckets..num_buckets { - let bucket_ptr = buckets_ptr.add(i as usize); - bucket_ptr.write(core::Bucket { + let bucket = buckets_ptr.add(i as usize); + bucket.write(core::Bucket { next: if i < num_buckets-1 { - i as u32 + 1 + i + 1 } else { inner.free_head }, @@ -376,11 +416,16 @@ where } /// Begin a shrink, limiting all new allocations to be in buckets with index below `num_buckets`. + /// + /// # Panics + /// Panics if called on a map initialized with [`HashMapInit::with_fixed`] or if `num_buckets` is + /// greater than the number of buckets in the map. pub fn begin_shrink(&mut self, num_buckets: u32) { let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - if num_buckets > map.inner.get_num_buckets() as u32 { - panic!("shrink called with a larger number of buckets"); - } + assert!( + num_buckets <= map.inner.get_num_buckets() as u32, + "shrink called with a larger number of buckets" + ); _ = self .shmem_handle .as_ref() @@ -388,47 +433,47 @@ where map.inner.alloc_limit = num_buckets; } - /// Returns whether a shrink operation is currently in progress. - pub fn is_shrinking(&self) -> bool { + /// If a shrink operation is underway, returns the target size of the map. Otherwise, returns None. + pub fn shrink_goal(&self) -> Option { let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - map.inner.is_shrinking() + let goal = map.inner.alloc_limit; + if goal == INVALID_POS { None } else { Some(goal as usize) } } - /// Returns how many entries need to be evicted before shrink can complete. - pub fn shrink_remaining(&self) -> usize { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - let inner = &mut map.inner; - if !inner.is_shrinking() { - panic!("shrink_remaining called when no ongoing shrink") - } else { - inner.buckets_in_use - .checked_sub(inner.alloc_limit) - .unwrap_or(0) - as usize - } - } - /// Complete a shrink after caller has evicted entries, removing the unused buckets and rehashing. - pub fn finish_shrink(&mut self) -> Result<(), crate::shmem::Error> { + /// + /// # Panics + /// The following cases result in a panic: + /// - Calling this function on a map initialized with [`HashMapInit::with_fixed`]. + /// - Calling this function on a map when no shrink operation is in progress. + /// - Calling this function on a map with `shrink_mode` set to [`HashMapShrinkMode::Remap`] and + /// [`HashMapAccess::get_num_buckets_in_use`] returns a value higher than [`HashMapAccess::shrink_goal`]. + /// + /// # Errors + /// Returns an [`shmem::Error`] if any errors occur resizing the memory region. + pub fn finish_shrink(&mut self) -> Result<(), shmem::Error> { let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); let inner = &mut map.inner; - if !inner.is_shrinking() { - panic!("called finish_shrink when no shrink is in progress"); - } + assert!(inner.is_shrinking(), "called finish_shrink when no shrink is in progress"); let num_buckets = inner.alloc_limit; if inner.get_num_buckets() == num_buckets as usize { return Ok(()); - } else if inner.buckets_in_use > num_buckets { - panic!("called finish_shrink before enough entries were removed"); - } + } - for i in (num_buckets as usize)..inner.buckets.len() { - if let Some((k, v)) = inner.buckets[i].inner.take() { - // alloc bucket increases buckets in use, so need to decrease since we're just moving - inner.buckets_in_use -= 1; - inner.alloc_bucket(k, v).unwrap(); + if self.shrink_mode == HashMapShrinkMode::Remap { + assert!( + inner.buckets_in_use <= num_buckets, + "called finish_shrink before enough entries were removed" + ); + + for i in (num_buckets as usize)..inner.buckets.len() { + if let Some((k, v)) = inner.buckets[i].inner.take() { + // alloc bucket increases buckets in use, so need to decrease since we're just moving + inner.buckets_in_use -= 1; + inner.alloc_bucket(k, v).unwrap(); + } } } diff --git a/libs/neon-shmem/src/hash/core.rs b/libs/neon-shmem/src/hash/core.rs index 22c44f20ac..b2cf788d21 100644 --- a/libs/neon-shmem/src/hash/core.rs +++ b/libs/neon-shmem/src/hash/core.rs @@ -5,6 +5,7 @@ use std::mem::MaybeUninit; use crate::hash::entry::{Entry, OccupiedEntry, PrevPos, VacantEntry}; +/// Invalid position within the map (either within the dictionary or bucket array). pub(crate) const INVALID_POS: u32 = u32::MAX; /// Fundamental storage unit within the hash table. Either empty or contains a key-value pair. @@ -18,13 +19,13 @@ pub(crate) struct Bucket { /// Core hash table implementation. pub(crate) struct CoreHashMap<'a, K, V> { - /// Dictionary used to map hashes to bucket indices. + /// Dictionary used to map hashes to bucket indices. pub(crate) dictionary: &'a mut [u32], /// Buckets containing key-value pairs. pub(crate) buckets: &'a mut [Bucket], /// Head of the freelist. pub(crate) free_head: u32, - /// Maximum index of a bucket allowed to be allocated. INVALID_POS if no limit. + /// Maximum index of a bucket allowed to be allocated. [`INVALID_POS`] if no limit. pub(crate) alloc_limit: u32, /// The number of currently occupied buckets. pub(crate) buckets_in_use: u32, @@ -36,10 +37,7 @@ pub(crate) struct CoreHashMap<'a, K, V> { #[derive(Debug)] pub struct FullError(); -impl<'a, K: Hash + Eq, V> CoreHashMap<'a, K, V> -where - K: Clone + Hash + Eq, -{ +impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> { const FILL_FACTOR: f32 = 0.60; /// Estimate the size of data contained within the the hash map. @@ -59,7 +57,7 @@ where pub fn new( buckets: &'a mut [MaybeUninit>], dictionary: &'a mut [MaybeUninit], - ) -> CoreHashMap<'a, K, V> { + ) -> Self { // Initialize the buckets for i in 0..buckets.len() { buckets[i].write(Bucket { @@ -73,8 +71,8 @@ where } // Initialize the dictionary - for i in 0..dictionary.len() { - dictionary[i].write(INVALID_POS); + for e in dictionary.iter_mut() { + e.write(INVALID_POS); } // TODO: use std::slice::assume_init_mut() once it stabilizes @@ -84,7 +82,7 @@ where std::slice::from_raw_parts_mut(dictionary.as_mut_ptr().cast(), dictionary.len()) }; - CoreHashMap { + Self { dictionary, buckets, free_head: 0, @@ -105,13 +103,13 @@ where let bucket = &self.buckets[next as usize]; let (bucket_key, bucket_value) = bucket.inner.as_ref().expect("entry is in use"); if bucket_key == key { - return Some(&bucket_value); + return Some(bucket_value); } next = bucket.next; } } - /// Get the `Entry` associated with a key given hash. This should be used for updates/inserts. + /// Get the [`Entry`] associated with a key given hash. This should be used for updates/inserts. pub fn entry_with_hash(&mut self, key: K, hash: u64) -> Entry<'a, '_, K, V> { let dict_pos = hash as usize % self.dictionary.len(); let first = self.dictionary[dict_pos]; @@ -236,7 +234,7 @@ where bucket.next = INVALID_POS; bucket.inner = Some((key, value)); - return Ok(pos); + Ok(pos) } } diff --git a/libs/neon-shmem/src/hash/entry.rs b/libs/neon-shmem/src/hash/entry.rs index 24c124189b..5231061b8e 100644 --- a/libs/neon-shmem/src/hash/entry.rs +++ b/libs/neon-shmem/src/hash/entry.rs @@ -1,4 +1,4 @@ -//! Like std::collections::hash_map::Entry; +//! Equivalent of [`std::collections::hash_map::Entry`] for this hashmap. use crate::hash::core::{CoreHashMap, FullError, INVALID_POS}; @@ -30,11 +30,11 @@ pub struct OccupiedEntry<'a, 'b, K, V> { pub(crate) _key: K, /// The index of the previous entry in the chain. pub(crate) prev_pos: PrevPos, - /// The position of the bucket in the CoreHashMap's buckets array. + /// The position of the bucket in the [`CoreHashMap`] bucket array. pub(crate) bucket_pos: u32, } -impl<'a, 'b, K, V> OccupiedEntry<'a, 'b, K, V> { +impl OccupiedEntry<'_, '_, K, V> { pub fn get(&self) -> &V { &self.map.buckets[self.bucket_pos as usize] .inner @@ -55,20 +55,25 @@ impl<'a, 'b, K, V> OccupiedEntry<'a, 'b, K, V> { pub fn insert(&mut self, value: V) -> V { let bucket = &mut self.map.buckets[self.bucket_pos as usize]; // This assumes inner is Some, which it must be for an OccupiedEntry - let old_value = mem::replace(&mut bucket.inner.as_mut().unwrap().1, value); - old_value + mem::replace(&mut bucket.inner.as_mut().unwrap().1, value) } /// Removes the entry from the hash map, returning the value originally stored within it. + /// + /// # Panics + /// Panics if the `prev_pos` field is equal to [`PrevPos::Unknown`]. In practice, this means + /// the entry was obtained via calling something like [`CoreHashMap::entry_at_bucket`]. pub fn remove(self) -> V { // CoreHashMap::remove returns Option<(K, V)>. We know it's Some for an OccupiedEntry. let bucket = &mut self.map.buckets[self.bucket_pos as usize]; // unlink it from the chain match self.prev_pos { - PrevPos::First(dict_pos) => self.map.dictionary[dict_pos as usize] = bucket.next, + PrevPos::First(dict_pos) => { + self.map.dictionary[dict_pos as usize] = bucket.next; + }, PrevPos::Chained(bucket_pos) => { - self.map.buckets[bucket_pos as usize].next = bucket.next + self.map.buckets[bucket_pos as usize].next = bucket.next; }, PrevPos::Unknown => panic!("can't safely remove entry with unknown previous entry"), } @@ -80,7 +85,7 @@ impl<'a, 'b, K, V> OccupiedEntry<'a, 'b, K, V> { self.map.free_head = self.bucket_pos; self.map.buckets_in_use -= 1; - return old_value.unwrap().1; + old_value.unwrap().1 } } @@ -94,8 +99,11 @@ pub struct VacantEntry<'a, 'b, K, V> { pub(crate) dict_pos: u32, } -impl<'a, 'b, K: Clone + Hash + Eq, V> VacantEntry<'a, 'b, K, V> { +impl<'b, K: Clone + Hash + Eq, V> VacantEntry<'_, 'b, K, V> { /// Insert a value into the vacant entry, finding and populating an empty bucket in the process. + /// + /// # Errors + /// Will return [`FullError`] if there are no unoccupied buckets in the map. pub fn insert(self, value: V) -> Result<&'b mut V, FullError> { let pos = self.map.alloc_bucket(self.key, value)?; if pos == INVALID_POS { @@ -106,6 +114,6 @@ impl<'a, 'b, K: Clone + Hash + Eq, V> VacantEntry<'a, 'b, K, V> { self.map.dictionary[self.dict_pos as usize] = pos; let result = &mut self.map.buckets[pos as usize].inner.as_mut().unwrap().1; - return Ok(result); + Ok(result) } } diff --git a/libs/neon-shmem/src/hash/tests.rs b/libs/neon-shmem/src/hash/tests.rs index 8a6e8a0a29..209db599b5 100644 --- a/libs/neon-shmem/src/hash/tests.rs +++ b/libs/neon-shmem/src/hash/tests.rs @@ -1,14 +1,11 @@ use std::collections::BTreeMap; use std::collections::HashSet; -use std::fmt::{Debug, Formatter}; -use std::mem::uninitialized; +use std::fmt::Debug; use std::mem::MaybeUninit; -use std::sync::atomic::{AtomicUsize, Ordering}; use crate::hash::HashMapAccess; use crate::hash::HashMapInit; use crate::hash::Entry; -use crate::shmem::ShmemHandle; use rand::seq::SliceRandom; use rand::{Rng, RngCore}; @@ -98,30 +95,6 @@ fn sparse() { test_inserts(&keys); } -struct TestValue(AtomicUsize); - -impl TestValue { - fn new(val: usize) -> TestValue { - TestValue(AtomicUsize::new(val)) - } - - fn load(&self) -> usize { - self.0.load(Ordering::Relaxed) - } -} - -impl Clone for TestValue { - fn clone(&self) -> TestValue { - TestValue::new(self.load()) - } -} - -impl Debug for TestValue { - fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { - write!(fmt, "{:?}", self.load()) - } -} - #[derive(Clone, Debug)] struct TestOp(TestKey, Option); @@ -177,7 +150,7 @@ fn do_deletes( writer: &mut HashMapAccess, shadow: &mut BTreeMap, ) { - for i in 0..num_ops { + for _ in 0..num_ops { let (k, _) = shadow.pop_first().unwrap(); let hash = writer.get_hash_value(&k); writer.remove_with_hash(&k, hash); @@ -187,7 +160,6 @@ fn do_deletes( fn do_shrink( writer: &mut HashMapAccess, shadow: &mut BTreeMap, - from: u32, to: u32 ) { writer.begin_shrink(to); @@ -195,7 +167,7 @@ fn do_shrink( let (k, _) = shadow.pop_first().unwrap(); let hash = writer.get_hash_value(&k); let entry = writer.entry_with_hash(k, hash); - if let Entry::Occupied(mut e) = entry { + if let Entry::Occupied(e) = entry { e.remove(); } } @@ -260,7 +232,7 @@ fn test_shrink() { let mut rng = rand::rng(); do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng); - do_shrink(&mut writer, &mut shadow, 1500, 1000); + do_shrink(&mut writer, &mut shadow, 1000); do_deletes(500, &mut writer, &mut shadow); do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng); assert!(writer.get_num_buckets_in_use() <= 1000); @@ -276,13 +248,13 @@ fn test_shrink_grow_seq() { do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng); eprintln!("Shrinking to 750"); - do_shrink(&mut writer, &mut shadow, 1000, 750); + do_shrink(&mut writer, &mut shadow, 750); do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng); eprintln!("Growing to 1500"); writer.grow(1500).unwrap(); do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng); eprintln!("Shrinking to 200"); - do_shrink(&mut writer, &mut shadow, 1500, 200); + do_shrink(&mut writer, &mut shadow, 200); do_deletes(100, &mut writer, &mut shadow); do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng); eprintln!("Growing to 10k"); diff --git a/libs/neon-shmem/src/shmem.rs b/libs/neon-shmem/src/shmem.rs index 21b1454b10..7c7285f67e 100644 --- a/libs/neon-shmem/src/shmem.rs +++ b/libs/neon-shmem/src/shmem.rs @@ -12,14 +12,14 @@ use nix::sys::mman::mmap as nix_mmap; use nix::sys::mman::munmap as nix_munmap; use nix::unistd::ftruncate as nix_ftruncate; -/// ShmemHandle represents a shared memory area that can be shared by processes over fork(). -/// Unlike shared memory allocated by Postgres, this area is resizable, up to 'max_size' that's +/// `ShmemHandle` represents a shared memory area that can be shared by processes over `fork()`. +/// Unlike shared memory allocated by Postgres, this area is resizable, up to `max_size` that's /// specified at creation. /// -/// The area is backed by an anonymous file created with memfd_create(). The full address space for -/// 'max_size' is reserved up-front with mmap(), but whenever you call [`ShmemHandle::set_size`], +/// The area is backed by an anonymous file created with `memfd_create()`. The full address space for +/// `max_size` is reserved up-front with `mmap()`, but whenever you call [`ShmemHandle::set_size`], /// the underlying file is resized. Do not access the area beyond the current size. Currently, that -/// will cause the file to be expanded, but we might use mprotect() etc. to enforce that in the +/// will cause the file to be expanded, but we might use `mprotect()` etc. to enforce that in the /// future. pub struct ShmemHandle { /// memfd file descriptor @@ -38,7 +38,7 @@ pub struct ShmemHandle { struct SharedStruct { max_size: usize, - /// Current size of the backing file. The high-order bit is used for the RESIZE_IN_PROGRESS flag + /// Current size of the backing file. The high-order bit is used for the [`RESIZE_IN_PROGRESS`] flag. current_size: AtomicUsize, } @@ -46,7 +46,7 @@ const RESIZE_IN_PROGRESS: usize = 1 << 63; const HEADER_SIZE: usize = std::mem::size_of::(); -/// Error type returned by the ShmemHandle functions. +/// Error type returned by the [`ShmemHandle`] functions. #[derive(thiserror::Error, Debug)] #[error("{msg}: {errno}")] pub struct Error { @@ -55,8 +55,8 @@ pub struct Error { } impl Error { - fn new(msg: &str, errno: Errno) -> Error { - Error { + fn new(msg: &str, errno: Errno) -> Self { + Self { msg: msg.to_string(), errno, } @@ -65,11 +65,11 @@ impl Error { impl ShmemHandle { /// Create a new shared memory area. To communicate between processes, the processes need to be - /// fork()'d after calling this, so that the ShmemHandle is inherited by all processes. + /// `fork()`'d after calling this, so that the `ShmemHandle` is inherited by all processes. /// - /// If the ShmemHandle is dropped, the memory is unmapped from the current process. Other + /// If the `ShmemHandle` is dropped, the memory is unmapped from the current process. Other /// processes can continue using it, however. - pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result { + pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result { // create the backing anonymous file. let fd = create_backing_file(name)?; @@ -80,17 +80,17 @@ impl ShmemHandle { fd: OwnedFd, initial_size: usize, max_size: usize, - ) -> Result { - // We reserve the high-order bit for the RESIZE_IN_PROGRESS flag, and the actual size + ) -> Result { + // We reserve the high-order bit for the `RESIZE_IN_PROGRESS` flag, and the actual size // is a little larger than this because of the SharedStruct header. Make the upper limit // somewhat smaller than that, because with anything close to that, you'll run out of // memory anyway. - if max_size >= 1 << 48 { - panic!("max size {} too large", max_size); - } - if initial_size > max_size { - panic!("initial size {initial_size} larger than max size {max_size}"); - } + assert!(max_size < 1 << 48, "max size {max_size} too large"); + + assert!( + initial_size <= max_size, + "initial size {initial_size} larger than max size {max_size}" + ); // The actual initial / max size is the one given by the caller, plus the size of // 'SharedStruct'. @@ -110,7 +110,7 @@ impl ShmemHandle { 0, ) } - .map_err(|e| Error::new("mmap failed: {e}", e))?; + .map_err(|e| Error::new("mmap failed", e))?; // Reserve space for the initial size enlarge_file(fd.as_fd(), initial_size as u64)?; @@ -121,13 +121,13 @@ impl ShmemHandle { shared.write(SharedStruct { max_size: max_size.into(), current_size: AtomicUsize::new(initial_size), - }) - }; + }); + } // The user data begins after the header let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) }; - Ok(ShmemHandle { + Ok(Self { fd, max_size: max_size.into(), shared_ptr: shared, @@ -140,28 +140,28 @@ impl ShmemHandle { unsafe { self.shared_ptr.as_ref() } } - /// Resize the shared memory area. 'new_size' must not be larger than the 'max_size' specified + /// Resize the shared memory area. `new_size` must not be larger than the `max_size` specified /// when creating the area. /// /// This may only be called from one process/thread concurrently. We detect that case - /// and return an Error. + /// and return an [`shmem::Error`](Error). pub fn set_size(&self, new_size: usize) -> Result<(), Error> { let new_size = new_size + HEADER_SIZE; let shared = self.shared(); - if new_size > self.max_size { - panic!( - "new size ({} is greater than max size ({})", - new_size, self.max_size - ); - } - assert_eq!(self.max_size, shared.max_size); + assert!( + new_size <= self.max_size, + "new size ({new_size}) is greater than max size ({})", + self.max_size + ); - // Lock the area by setting the bit in 'current_size' + assert_eq!(self.max_size, shared.max_size); + + // Lock the area by setting the bit in `current_size` // // Ordering::Relaxed would probably be sufficient here, as we don't access any other memory - // and the posix_fallocate/ftruncate call is surely a synchronization point anyway. But - // since this is not performance-critical, better safe than sorry . + // and the `posix_fallocate`/`ftruncate` call is surely a synchronization point anyway. But + // since this is not performance-critical, better safe than sorry. let mut old_size = shared.current_size.load(Ordering::Acquire); loop { if (old_size & RESIZE_IN_PROGRESS) != 0 { @@ -188,7 +188,7 @@ impl ShmemHandle { use std::cmp::Ordering::{Equal, Greater, Less}; match new_size.cmp(&old_size) { Less => nix_ftruncate(&self.fd, new_size as i64).map_err(|e| { - Error::new("could not shrink shmem segment, ftruncate failed: {e}", e) + Error::new("could not shrink shmem segment, ftruncate failed", e) }), Equal => Ok(()), Greater => enlarge_file(self.fd.as_fd(), new_size as u64), @@ -206,8 +206,8 @@ impl ShmemHandle { /// Returns the current user-visible size of the shared memory segment. /// - /// NOTE: a concurrent set_size() call can change the size at any time. It is the caller's - /// responsibility not to access the area beyond the current size. + /// NOTE: a concurrent [`ShmemHandle::set_size()`] call can change the size at any time. + /// It is the caller's responsibility not to access the area beyond the current size. pub fn current_size(&self) -> usize { let total_current_size = self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS; @@ -224,23 +224,23 @@ impl Drop for ShmemHandle { } } -/// Create a "backing file" for the shared memory area. On Linux, use memfd_create(), to create an +/// Create a "backing file" for the shared memory area. On Linux, use `memfd_create()`, to create an /// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for /// development and testing, but in production we want the file to stay in memory. /// -/// disable 'unused_variables' warnings, because in the macos path, 'name' is unused. +/// Disable unused variables warnings because `name` is unused in the macos path. #[allow(unused_variables)] fn create_backing_file(name: &str) -> Result { #[cfg(not(target_os = "macos"))] { nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty()) - .map_err(|e| Error::new("memfd_create failed: {e}", e)) + .map_err(|e| Error::new("memfd_create failed", e)) } #[cfg(target_os = "macos")] { let file = tempfile::tempfile().map_err(|e| { Error::new( - "could not create temporary file to back shmem area: {e}", + "could not create temporary file to back shmem area", nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)), ) })?; @@ -255,7 +255,7 @@ fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> { { nix::fcntl::posix_fallocate(fd, 0, size as i64).map_err(|e| { Error::new( - "could not grow shmem segment, posix_fallocate failed: {e}", + "could not grow shmem segment, posix_fallocate failed", e, ) }) @@ -264,7 +264,7 @@ fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> { #[cfg(target_os = "macos")] { nix::unistd::ftruncate(fd, size as i64) - .map_err(|e| Error::new("could not grow shmem segment, ftruncate failed: {e}", e)) + .map_err(|e| Error::new("could not grow shmem segment, ftruncate failed", e)) } } @@ -330,7 +330,7 @@ mod tests { Ok(()) } - /// This is used in tests to coordinate between test processes. It's like std::sync::Barrier, + /// This is used in tests to coordinate between test processes. It's like `std::sync::Barrier`, /// but is stored in the shared memory area and works across processes. It's implemented by /// polling, because e.g. standard rust mutexes are not guaranteed to work across processes. struct SimpleBarrier { From 47664e40d452a4e3f771e6c6b4905f02d6c710a2 Mon Sep 17 00:00:00 2001 From: David Freifeld Date: Thu, 26 Jun 2025 16:00:33 -0700 Subject: [PATCH 2/7] Initial work in visualizing properties of hashmap --- Cargo.lock | 299 ++++++++++++++++++++++++++++++ libs/neon-shmem/Cargo.toml | 8 + libs/neon-shmem/src/hash.rs | 26 ++- libs/neon-shmem/src/hmap_stats.rs | 139 ++++++++++++++ 4 files changed, 471 insertions(+), 1 deletion(-) create mode 100644 libs/neon-shmem/src/hmap_stats.rs diff --git a/Cargo.lock b/Cargo.lock index 4fd5f5802b..c407806b3d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1336,6 +1336,12 @@ dependencies = [ "cc", ] +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + [[package]] name = "colorchoice" version = "1.0.4" @@ -1595,6 +1601,42 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core-graphics" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c07782be35f9e1140080c6b96f0d44b739e2278479f64e02fdab4e32dfd8b081" +dependencies = [ + "bitflags 1.3.2", + "core-foundation 0.9.4", + "core-graphics-types", + "foreign-types", + "libc", +] + +[[package]] +name = "core-graphics-types" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" +dependencies = [ + "bitflags 1.3.2", + "core-foundation 0.9.4", + "libc", +] + +[[package]] +name = "core-text" +version = "20.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9d2790b5c08465d49f8dc05c8bcae9fea467855947db39b0f8145c091aaced5" +dependencies = [ + "core-foundation 0.9.4", + "core-graphics", + "foreign-types", + "libc", +] + [[package]] name = "cpp_demangle" version = "0.4.4" @@ -2047,6 +2089,27 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.59.0", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -2058,6 +2121,15 @@ dependencies = [ "syn", ] +[[package]] +name = "dlib" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "330c60081dcc4c72131f8eb70510f1ac07223e5d4163db481a04a0befcffa412" +dependencies = [ + "libloading", +] + [[package]] name = "dlv-list" version = "0.5.2" @@ -2087,6 +2159,18 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "dwrote" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfe1f192fcce01590bd8d839aca53ce0d11d803bf291b2a6c4ad925a8f0024be" +dependencies = [ + "lazy_static", + "libc", + "winapi", + "wio", +] + [[package]] name = "dyn-clone" version = "1.0.19" @@ -2377,6 +2461,15 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "fdeflate" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" +dependencies = [ + "simd-adler32", +] + [[package]] name = "ff" version = "0.12.1" @@ -2455,6 +2548,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float-ord" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce81f49ae8a0482e4c55ea62ebbd7e5a686af544c00b9d090bba3ff9be97b3d" + [[package]] name = "fnv" version = "1.0.7" @@ -2467,6 +2566,58 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "font-kit" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c7e611d49285d4c4b2e1727b72cf05353558885cc5252f93707b845dfcaf3d3" +dependencies = [ + "bitflags 2.9.1", + "byteorder", + "core-foundation 0.9.4", + "core-graphics", + "core-text", + "dirs", + "dwrote", + "float-ord", + "freetype-sys", + "lazy_static", + "libc", + "log", + "pathfinder_geometry", + "pathfinder_simd", + "walkdir", + "winapi", + "yeslogic-fontconfig-sys", +] + +[[package]] +name = "foreign-types" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" +dependencies = [ + "foreign-types-macros", + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-macros" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "foreign-types-shared" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -2497,6 +2648,17 @@ dependencies = [ "tokio-util", ] +[[package]] +name = "freetype-sys" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7edc5b9669349acfda99533e9e0bcf26a51862ab43b08ee7745c55d28eb134" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "fs_extra" version = "1.3.0" @@ -2687,6 +2849,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "gif" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80792593675e051cf94a4b111980da2ba60d4a83e43e0048c5693baab3977045" +dependencies = [ + "color_quant", + "weezl", +] + [[package]] name = "gimli" version = "0.31.1" @@ -3371,6 +3543,20 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "image" +version = "0.24.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5690139d2f55868e080017335e4b94cb7414274c74f1669c84fb5feba2c9f69d" +dependencies = [ + "bytemuck", + "byteorder", + "color_quant", + "jpeg-decoder", + "num-traits", + "png", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -3655,6 +3841,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "jpeg-decoder" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00810f1d8b74be64b13dbf3db89ac67740615d6c891f0e7b6179326533011a07" + [[package]] name = "js-sys" version = "0.3.77" @@ -3998,6 +4190,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -4038,6 +4231,7 @@ dependencies = [ "foldhash", "hashbrown 0.15.4 (git+https://github.com/quantumish/hashbrown.git?rev=6610e6d)", "nix 0.30.1", + "plotters", "rand 0.9.1", "rand_distr 0.5.1", "rustc-hash 2.1.1", @@ -4406,6 +4600,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "ordered-float" version = "2.10.1" @@ -4842,6 +5042,25 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pathfinder_geometry" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b7e7b4ea703700ce73ebf128e1450eb69c3a8329199ffbfb9b2a0418e5ad3" +dependencies = [ + "log", + "pathfinder_simd", +] + +[[package]] +name = "pathfinder_simd" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf9027960355bf3afff9841918474a81a5f972ac6d226d518060bba758b5ad57" +dependencies = [ + "rustc_version", +] + [[package]] name = "pbkdf2" version = "0.12.2" @@ -5005,9 +5224,16 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ + "chrono", + "font-kit", + "image", + "lazy_static", "num-traits", + "pathfinder_geometry", "plotters-backend", + "plotters-bitmap", "plotters-svg", + "ttf-parser", "wasm-bindgen", "web-sys", ] @@ -5018,6 +5244,17 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" +[[package]] +name = "plotters-bitmap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ce181e3f6bf82d6c1dc569103ca7b1bd964c60ba03d7e6cdfbb3e3eb7f7405" +dependencies = [ + "gif", + "image", + "plotters-backend", +] + [[package]] name = "plotters-svg" version = "0.3.7" @@ -5027,6 +5264,19 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "png" +version = "0.17.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82151a2fc869e011c153adc57cf2789ccb8d9906ce52c0b39a6b5697749d7526" +dependencies = [ + "bitflags 1.3.2", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + [[package]] name = "polonius-the-crab" version = "0.4.2" @@ -5914,6 +6164,17 @@ dependencies = [ "bitflags 2.9.1", ] +[[package]] +name = "redox_users" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 2.0.12", +] + [[package]] name = "regex" version = "1.11.1" @@ -6986,6 +7247,12 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + [[package]] name = "simple_asn1" version = "0.6.3" @@ -8177,6 +8444,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "ttf-parser" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17f77d76d837a7830fe1d4f12b7b4ba4192c1888001c7164257e4bc6d21d96b4" + [[package]] name = "tungstenite" version = "0.21.0" @@ -8715,6 +8988,12 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "weezl" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3" + [[package]] name = "which" version = "4.4.2" @@ -9067,6 +9346,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "wio" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d129932f4644ac2396cb456385cbf9e63b5b30c6e8dc4820bdca4eb082037a5" +dependencies = [ + "winapi", +] + [[package]] name = "wit-bindgen-rt" version = "0.39.0" @@ -9252,6 +9540,17 @@ dependencies = [ "time", ] +[[package]] +name = "yeslogic-fontconfig-sys" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "503a066b4c037c440169d995b869046827dbc71263f6e8f3be6d77d4f3229dbd" +dependencies = [ + "dlib", + "once_cell", + "pkg-config", +] + [[package]] name = "yoke" version = "0.8.0" diff --git a/libs/neon-shmem/Cargo.toml b/libs/neon-shmem/Cargo.toml index 1c47ffed37..3a22ce6c8b 100644 --- a/libs/neon-shmem/Cargo.toml +++ b/libs/neon-shmem/Cargo.toml @@ -10,6 +10,7 @@ nix.workspace = true workspace_hack = { version = "0.1", path = "../../workspace_hack" } rustc-hash = { version = "2.1.1" } rand = "0.9.1" +plotters = { version = "0.3.7", optional = true } [dev-dependencies] criterion = { workspace = true, features = ["html_reports"] } @@ -21,6 +22,7 @@ seahash = "4.1.0" hashbrown = { git = "https://github.com/quantumish/hashbrown.git", rev = "6610e6d" } foldhash = "0.1.5" + [target.'cfg(target_os = "macos")'.dependencies] tempfile = "3.14.0" @@ -28,3 +30,9 @@ tempfile = "3.14.0" name = "hmap_resize" harness = false +[[bin]] +name = "hmap_stats" +path = "src/hmap_stats.rs" + +[features] +stats = ["dep:plotters"] diff --git a/libs/neon-shmem/src/hash.rs b/libs/neon-shmem/src/hash.rs index 36fbb1112c..6cc641814a 100644 --- a/libs/neon-shmem/src/hash.rs +++ b/libs/neon-shmem/src/hash.rs @@ -490,5 +490,29 @@ where inner.alloc_limit = INVALID_POS; Ok(()) - } + } + + #[cfg(feature = "stats")] + pub fn dict_len(&self) -> usize { + let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); + map.inner.dictionary.len() + } + + #[cfg(feature = "stats")] + pub fn chain_distribution(&self) -> (Vec<(usize, usize)>, usize) { + let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); + let mut out = Vec::new(); + let mut max = 0; + for (i, d) in map.inner.dictionary.iter().enumerate() { + let mut curr = *d; + let mut len = 0; + while curr != INVALID_POS { + curr = map.inner.buckets[curr as usize].next; + len += 1; + } + out.push((i, len)); + max = max.max(len); + } + (out, max) + } } diff --git a/libs/neon-shmem/src/hmap_stats.rs b/libs/neon-shmem/src/hmap_stats.rs new file mode 100644 index 0000000000..9e55ad1f05 --- /dev/null +++ b/libs/neon-shmem/src/hmap_stats.rs @@ -0,0 +1,139 @@ +use neon_shmem::hash::HashMapAccess; +use neon_shmem::hash::HashMapInit; +use neon_shmem::hash::entry::Entry; +use rand::prelude::*; +use rand::distr::{Distribution, StandardUniform}; +use plotters::prelude::*; + +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +#[repr(C)] +pub struct FileCacheKey { + pub _spc_id: u32, + pub _db_id: u32, + pub _rel_number: u32, + pub _fork_num: u32, + pub _block_num: u32, +} + +impl Distribution for StandardUniform { + // questionable, but doesn't need to be good randomness + fn sample(&self, rng: &mut R) -> FileCacheKey { + FileCacheKey { + _spc_id: rng.random(), + _db_id: rng.random(), + _rel_number: rng.random(), + _fork_num: rng.random(), + _block_num: rng.random() + } + } +} + +#[derive(Clone, Debug)] +#[repr(C)] +pub struct FileCacheEntry { + pub _offset: u32, + pub _access_count: u32, + pub _prev: *mut FileCacheEntry, + pub _next: *mut FileCacheEntry, + pub _state: [u32; 8], +} + +impl FileCacheEntry { + fn dummy() -> Self { + Self { + _offset: 0, + _access_count: 0, + _prev: std::ptr::null_mut(), + _next: std::ptr::null_mut(), + _state: [0; 8] + } + } +} + +// Utilities for applying operations. + +#[derive(Clone, Debug)] +struct TestOp(K, Option); + +fn apply_op( + op: TestOp, + map: &mut HashMapAccess, +) { + let hash = map.get_hash_value(&op.0); + let entry = map.entry_with_hash(op.0, hash); + + match op.1 { + Some(new) => { + match entry { + Entry::Occupied(mut e) => Some(e.insert(new)), + Entry::Vacant(e) => { e.insert(new).unwrap(); None }, + } + }, + None => { + match entry { + Entry::Occupied(e) => Some(e.remove()), + Entry::Vacant(_) => None, + } + }, + }; +} + +#[cfg(feature = "stats")] +fn main() { + let ideal_filled = 16_000_000; + let size = 20_000_000; + let mut writer = HashMapInit::new_resizeable(size, size).attach_writer(); + let mut rng = rand::rng(); + while writer.get_num_buckets_in_use() < ideal_filled as usize { + let key: FileCacheKey = rng.random(); + let val = FileCacheEntry::dummy(); + apply_op(TestOp(key, Some(val)), &mut writer); + } + println!("Inserted {ideal_filled} entries into a map with capacity {size}."); + let (distr, max) = writer.chain_distribution(); + + let root_area = BitMapBackend::new("chain_distr.png", (800, 400)) + .into_drawing_area(); + root_area.fill(&WHITE).unwrap(); + + let mut ctx = ChartBuilder::on(&root_area) + .set_label_area_size(LabelAreaPosition::Left, 40) + .set_label_area_size(LabelAreaPosition::Bottom, 40) + .build_cartesian_2d((0..max).into_segmented(), (0..ideal_filled * 2).log_scale()) + .unwrap(); + + ctx.configure_mesh() + .y_label_formatter(&|y| format!("{:e}", y)) + .draw().unwrap(); + + ctx.draw_series( + Histogram::vertical(&ctx) + .margin(10) + .data(distr.iter().map(|x| (x.1, 1))) + ).unwrap(); + + // let root_area = BitMapBackend::new("dict_distr.png", (2000, 400)) + // .into_drawing_area(); + // root_area.fill(&WHITE).unwrap(); + + // let mut ctx = ChartBuilder::on(&root_area) + // .set_label_area_size(LabelAreaPosition::Left, 40) + // .set_label_area_size(LabelAreaPosition::Bottom, 40) + // .build_cartesian_2d((0..writer.dict_len()), (0..(max as f32 * 1.5) as usize)) + // .unwrap(); + + // ctx.configure_mesh().draw().unwrap(); + + // ctx.draw_series(LineSeries::new( + // distr.iter().map(|(bin, count)| (*bin, *count)), + // &RED, + // )).unwrap(); + + // println!("Longest chain: {}", writer.longest_chain()); +} + +#[cfg(not(feature = "stats"))] +fn main() { + println!("Enable the `stats` feature to use this binary!"); +} + From c3c136ef3a7b9dfceba63fc837f4a1cb6287e4e9 Mon Sep 17 00:00:00 2001 From: David Freifeld Date: Fri, 27 Jun 2025 17:10:52 -0700 Subject: [PATCH 3/7] Remove statistics utilities from neon_shmem crate --- Cargo.lock | 299 ------------------------------ libs/neon-shmem/Cargo.toml | 8 - libs/neon-shmem/src/hmap_stats.rs | 139 -------------- 3 files changed, 446 deletions(-) delete mode 100644 libs/neon-shmem/src/hmap_stats.rs diff --git a/Cargo.lock b/Cargo.lock index c407806b3d..4fd5f5802b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1336,12 +1336,6 @@ dependencies = [ "cc", ] -[[package]] -name = "color_quant" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" - [[package]] name = "colorchoice" version = "1.0.4" @@ -1601,42 +1595,6 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "core-graphics" -version = "0.23.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c07782be35f9e1140080c6b96f0d44b739e2278479f64e02fdab4e32dfd8b081" -dependencies = [ - "bitflags 1.3.2", - "core-foundation 0.9.4", - "core-graphics-types", - "foreign-types", - "libc", -] - -[[package]] -name = "core-graphics-types" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" -dependencies = [ - "bitflags 1.3.2", - "core-foundation 0.9.4", - "libc", -] - -[[package]] -name = "core-text" -version = "20.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9d2790b5c08465d49f8dc05c8bcae9fea467855947db39b0f8145c091aaced5" -dependencies = [ - "core-foundation 0.9.4", - "core-graphics", - "foreign-types", - "libc", -] - [[package]] name = "cpp_demangle" version = "0.4.4" @@ -2089,27 +2047,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "dirs" -version = "6.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" -dependencies = [ - "dirs-sys", -] - -[[package]] -name = "dirs-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" -dependencies = [ - "libc", - "option-ext", - "redox_users", - "windows-sys 0.59.0", -] - [[package]] name = "displaydoc" version = "0.2.5" @@ -2121,15 +2058,6 @@ dependencies = [ "syn", ] -[[package]] -name = "dlib" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "330c60081dcc4c72131f8eb70510f1ac07223e5d4163db481a04a0befcffa412" -dependencies = [ - "libloading", -] - [[package]] name = "dlv-list" version = "0.5.2" @@ -2159,18 +2087,6 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" -[[package]] -name = "dwrote" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe1f192fcce01590bd8d839aca53ce0d11d803bf291b2a6c4ad925a8f0024be" -dependencies = [ - "lazy_static", - "libc", - "winapi", - "wio", -] - [[package]] name = "dyn-clone" version = "1.0.19" @@ -2461,15 +2377,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "fdeflate" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" -dependencies = [ - "simd-adler32", -] - [[package]] name = "ff" version = "0.12.1" @@ -2548,12 +2455,6 @@ dependencies = [ "miniz_oxide", ] -[[package]] -name = "float-ord" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce81f49ae8a0482e4c55ea62ebbd7e5a686af544c00b9d090bba3ff9be97b3d" - [[package]] name = "fnv" version = "1.0.7" @@ -2566,58 +2467,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" -[[package]] -name = "font-kit" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c7e611d49285d4c4b2e1727b72cf05353558885cc5252f93707b845dfcaf3d3" -dependencies = [ - "bitflags 2.9.1", - "byteorder", - "core-foundation 0.9.4", - "core-graphics", - "core-text", - "dirs", - "dwrote", - "float-ord", - "freetype-sys", - "lazy_static", - "libc", - "log", - "pathfinder_geometry", - "pathfinder_simd", - "walkdir", - "winapi", - "yeslogic-fontconfig-sys", -] - -[[package]] -name = "foreign-types" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" -dependencies = [ - "foreign-types-macros", - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-macros" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "foreign-types-shared" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" - [[package]] name = "form_urlencoded" version = "1.2.1" @@ -2648,17 +2497,6 @@ dependencies = [ "tokio-util", ] -[[package]] -name = "freetype-sys" -version = "0.20.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7edc5b9669349acfda99533e9e0bcf26a51862ab43b08ee7745c55d28eb134" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "fs_extra" version = "1.3.0" @@ -2849,16 +2687,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "gif" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80792593675e051cf94a4b111980da2ba60d4a83e43e0048c5693baab3977045" -dependencies = [ - "color_quant", - "weezl", -] - [[package]] name = "gimli" version = "0.31.1" @@ -3543,20 +3371,6 @@ dependencies = [ "icu_properties", ] -[[package]] -name = "image" -version = "0.24.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5690139d2f55868e080017335e4b94cb7414274c74f1669c84fb5feba2c9f69d" -dependencies = [ - "bytemuck", - "byteorder", - "color_quant", - "jpeg-decoder", - "num-traits", - "png", -] - [[package]] name = "indexmap" version = "1.9.3" @@ -3841,12 +3655,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "jpeg-decoder" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00810f1d8b74be64b13dbf3db89ac67740615d6c891f0e7b6179326533011a07" - [[package]] name = "js-sys" version = "0.3.77" @@ -4190,7 +3998,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", - "simd-adler32", ] [[package]] @@ -4231,7 +4038,6 @@ dependencies = [ "foldhash", "hashbrown 0.15.4 (git+https://github.com/quantumish/hashbrown.git?rev=6610e6d)", "nix 0.30.1", - "plotters", "rand 0.9.1", "rand_distr 0.5.1", "rustc-hash 2.1.1", @@ -4600,12 +4406,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "option-ext" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" - [[package]] name = "ordered-float" version = "2.10.1" @@ -5042,25 +4842,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" -[[package]] -name = "pathfinder_geometry" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b7e7b4ea703700ce73ebf128e1450eb69c3a8329199ffbfb9b2a0418e5ad3" -dependencies = [ - "log", - "pathfinder_simd", -] - -[[package]] -name = "pathfinder_simd" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf9027960355bf3afff9841918474a81a5f972ac6d226d518060bba758b5ad57" -dependencies = [ - "rustc_version", -] - [[package]] name = "pbkdf2" version = "0.12.2" @@ -5224,16 +5005,9 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ - "chrono", - "font-kit", - "image", - "lazy_static", "num-traits", - "pathfinder_geometry", "plotters-backend", - "plotters-bitmap", "plotters-svg", - "ttf-parser", "wasm-bindgen", "web-sys", ] @@ -5244,17 +5018,6 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" -[[package]] -name = "plotters-bitmap" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72ce181e3f6bf82d6c1dc569103ca7b1bd964c60ba03d7e6cdfbb3e3eb7f7405" -dependencies = [ - "gif", - "image", - "plotters-backend", -] - [[package]] name = "plotters-svg" version = "0.3.7" @@ -5264,19 +5027,6 @@ dependencies = [ "plotters-backend", ] -[[package]] -name = "png" -version = "0.17.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82151a2fc869e011c153adc57cf2789ccb8d9906ce52c0b39a6b5697749d7526" -dependencies = [ - "bitflags 1.3.2", - "crc32fast", - "fdeflate", - "flate2", - "miniz_oxide", -] - [[package]] name = "polonius-the-crab" version = "0.4.2" @@ -6164,17 +5914,6 @@ dependencies = [ "bitflags 2.9.1", ] -[[package]] -name = "redox_users" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" -dependencies = [ - "getrandom 0.2.16", - "libredox", - "thiserror 2.0.12", -] - [[package]] name = "regex" version = "1.11.1" @@ -7247,12 +6986,6 @@ dependencies = [ "rand_core 0.6.4", ] -[[package]] -name = "simd-adler32" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" - [[package]] name = "simple_asn1" version = "0.6.3" @@ -8444,12 +8177,6 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "ttf-parser" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f77d76d837a7830fe1d4f12b7b4ba4192c1888001c7164257e4bc6d21d96b4" - [[package]] name = "tungstenite" version = "0.21.0" @@ -8988,12 +8715,6 @@ dependencies = [ "rustls-pki-types", ] -[[package]] -name = "weezl" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3" - [[package]] name = "which" version = "4.4.2" @@ -9346,15 +9067,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "wio" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d129932f4644ac2396cb456385cbf9e63b5b30c6e8dc4820bdca4eb082037a5" -dependencies = [ - "winapi", -] - [[package]] name = "wit-bindgen-rt" version = "0.39.0" @@ -9540,17 +9252,6 @@ dependencies = [ "time", ] -[[package]] -name = "yeslogic-fontconfig-sys" -version = "6.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503a066b4c037c440169d995b869046827dbc71263f6e8f3be6d77d4f3229dbd" -dependencies = [ - "dlib", - "once_cell", - "pkg-config", -] - [[package]] name = "yoke" version = "0.8.0" diff --git a/libs/neon-shmem/Cargo.toml b/libs/neon-shmem/Cargo.toml index 3a22ce6c8b..08b5ac2138 100644 --- a/libs/neon-shmem/Cargo.toml +++ b/libs/neon-shmem/Cargo.toml @@ -10,7 +10,6 @@ nix.workspace = true workspace_hack = { version = "0.1", path = "../../workspace_hack" } rustc-hash = { version = "2.1.1" } rand = "0.9.1" -plotters = { version = "0.3.7", optional = true } [dev-dependencies] criterion = { workspace = true, features = ["html_reports"] } @@ -29,10 +28,3 @@ tempfile = "3.14.0" [[bench]] name = "hmap_resize" harness = false - -[[bin]] -name = "hmap_stats" -path = "src/hmap_stats.rs" - -[features] -stats = ["dep:plotters"] diff --git a/libs/neon-shmem/src/hmap_stats.rs b/libs/neon-shmem/src/hmap_stats.rs deleted file mode 100644 index 9e55ad1f05..0000000000 --- a/libs/neon-shmem/src/hmap_stats.rs +++ /dev/null @@ -1,139 +0,0 @@ -use neon_shmem::hash::HashMapAccess; -use neon_shmem::hash::HashMapInit; -use neon_shmem::hash::entry::Entry; -use rand::prelude::*; -use rand::distr::{Distribution, StandardUniform}; -use plotters::prelude::*; - -#[derive(Clone, Debug, Hash, Eq, PartialEq)] -#[repr(C)] -pub struct FileCacheKey { - pub _spc_id: u32, - pub _db_id: u32, - pub _rel_number: u32, - pub _fork_num: u32, - pub _block_num: u32, -} - -impl Distribution for StandardUniform { - // questionable, but doesn't need to be good randomness - fn sample(&self, rng: &mut R) -> FileCacheKey { - FileCacheKey { - _spc_id: rng.random(), - _db_id: rng.random(), - _rel_number: rng.random(), - _fork_num: rng.random(), - _block_num: rng.random() - } - } -} - -#[derive(Clone, Debug)] -#[repr(C)] -pub struct FileCacheEntry { - pub _offset: u32, - pub _access_count: u32, - pub _prev: *mut FileCacheEntry, - pub _next: *mut FileCacheEntry, - pub _state: [u32; 8], -} - -impl FileCacheEntry { - fn dummy() -> Self { - Self { - _offset: 0, - _access_count: 0, - _prev: std::ptr::null_mut(), - _next: std::ptr::null_mut(), - _state: [0; 8] - } - } -} - -// Utilities for applying operations. - -#[derive(Clone, Debug)] -struct TestOp(K, Option); - -fn apply_op( - op: TestOp, - map: &mut HashMapAccess, -) { - let hash = map.get_hash_value(&op.0); - let entry = map.entry_with_hash(op.0, hash); - - match op.1 { - Some(new) => { - match entry { - Entry::Occupied(mut e) => Some(e.insert(new)), - Entry::Vacant(e) => { e.insert(new).unwrap(); None }, - } - }, - None => { - match entry { - Entry::Occupied(e) => Some(e.remove()), - Entry::Vacant(_) => None, - } - }, - }; -} - -#[cfg(feature = "stats")] -fn main() { - let ideal_filled = 16_000_000; - let size = 20_000_000; - let mut writer = HashMapInit::new_resizeable(size, size).attach_writer(); - let mut rng = rand::rng(); - while writer.get_num_buckets_in_use() < ideal_filled as usize { - let key: FileCacheKey = rng.random(); - let val = FileCacheEntry::dummy(); - apply_op(TestOp(key, Some(val)), &mut writer); - } - println!("Inserted {ideal_filled} entries into a map with capacity {size}."); - let (distr, max) = writer.chain_distribution(); - - let root_area = BitMapBackend::new("chain_distr.png", (800, 400)) - .into_drawing_area(); - root_area.fill(&WHITE).unwrap(); - - let mut ctx = ChartBuilder::on(&root_area) - .set_label_area_size(LabelAreaPosition::Left, 40) - .set_label_area_size(LabelAreaPosition::Bottom, 40) - .build_cartesian_2d((0..max).into_segmented(), (0..ideal_filled * 2).log_scale()) - .unwrap(); - - ctx.configure_mesh() - .y_label_formatter(&|y| format!("{:e}", y)) - .draw().unwrap(); - - ctx.draw_series( - Histogram::vertical(&ctx) - .margin(10) - .data(distr.iter().map(|x| (x.1, 1))) - ).unwrap(); - - // let root_area = BitMapBackend::new("dict_distr.png", (2000, 400)) - // .into_drawing_area(); - // root_area.fill(&WHITE).unwrap(); - - // let mut ctx = ChartBuilder::on(&root_area) - // .set_label_area_size(LabelAreaPosition::Left, 40) - // .set_label_area_size(LabelAreaPosition::Bottom, 40) - // .build_cartesian_2d((0..writer.dict_len()), (0..(max as f32 * 1.5) as usize)) - // .unwrap(); - - // ctx.configure_mesh().draw().unwrap(); - - // ctx.draw_series(LineSeries::new( - // distr.iter().map(|(bin, count)| (*bin, *count)), - // &RED, - // )).unwrap(); - - // println!("Longest chain: {}", writer.longest_chain()); -} - -#[cfg(not(feature = "stats"))] -fn main() { - println!("Enable the `stats` feature to use this binary!"); -} - From 74330920ee6ced0099010b8ecf0974fbbb539ad6 Mon Sep 17 00:00:00 2001 From: David Freifeld Date: Fri, 27 Jun 2025 17:11:22 -0700 Subject: [PATCH 4/7] Simplify API, squash bugs, and expand hashmap test suite --- libs/neon-shmem/src/hash.rs | 108 ++++++++++++++++------------ libs/neon-shmem/src/hash/core.rs | 29 +------- libs/neon-shmem/src/hash/entry.rs | 27 +++++-- libs/neon-shmem/src/hash/tests.rs | 114 ++++++++++++++++++++++++------ 4 files changed, 178 insertions(+), 100 deletions(-) diff --git a/libs/neon-shmem/src/hash.rs b/libs/neon-shmem/src/hash.rs index 6cc641814a..7d47a4f5e5 100644 --- a/libs/neon-shmem/src/hash.rs +++ b/libs/neon-shmem/src/hash.rs @@ -224,40 +224,75 @@ where K: Clone + Hash + Eq, { /// Hash a key using the map's hasher. - pub fn get_hash_value(&self, key: &K) -> u64 { + #[inline] + fn get_hash_value(&self, key: &K) -> u64 { self.hasher.hash_one(key) } - /// Get a reference to the corresponding value for a key given its hash. - pub fn get_with_hash<'e>(&'e self, key: &K, hash: u64) -> Option<&'e V> { + /// Get a reference to the corresponding value for a key. + pub fn get<'e>(&'e self, key: &K) -> Option<&'e V> { let map = unsafe { self.shared_ptr.as_ref() }.unwrap(); - + let hash = self.get_hash_value(key); map.inner.get_with_hash(key, hash) } - /// Get a reference to the entry containing a key given its hash. - pub fn entry_with_hash(&mut self, key: K, hash: u64) -> Entry<'a, '_, K, V> { + /// Get a reference to the entry containing a key. + pub fn entry(&self, key: K) -> Entry<'a, '_, K, V> { let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - + let hash = self.get_hash_value(&key); map.inner.entry_with_hash(key, hash) } - /// Remove a key given its hash. Does nothing if key is not present. - pub fn remove_with_hash(&mut self, key: &K, hash: u64) { + /// Remove a key given its hash. Returns the associated value if it existed. + pub fn remove(&self, key: &K) -> Option { let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - + let hash = self.get_hash_value(&key); match map.inner.entry_with_hash(key.clone(), hash) { - Entry::Occupied(e) => { - e.remove(); - } - Entry::Vacant(_) => {} + Entry::Occupied(e) => Some(e.remove()), + Entry::Vacant(_) => None } } - /// Optionally return the entry for a bucket at a given index if it exists. - pub fn entry_at_bucket(&mut self, pos: usize) -> Option> { + /// Insert/update a key. Returns the previous associated value if it existed. + /// + /// # Errors + /// Will return [`core::FullError`] if there is no more space left in the map. + pub fn insert(&self, key: K, value: V) -> Result, core::FullError> { let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - map.inner.entry_at_bucket(pos) + let hash = self.get_hash_value(&key); + match map.inner.entry_with_hash(key.clone(), hash) { + Entry::Occupied(mut e) => Ok(Some(e.insert(value))), + Entry::Vacant(e) => { + e.insert(value)?; + Ok(None) + } + } + } + + /// Optionally return the entry for a bucket at a given index if it exists. + /// + /// Has more overhead than one would intuitively expect: performs both a clone of the key + /// due to the [`OccupiedEntry`] type owning the key and also a hash of the key in order + /// to enable repairing the hash chain if the entry is removed. + pub fn entry_at_bucket(&self, pos: usize) -> Option> { + let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); + let inner = &mut map.inner; + if pos >= inner.buckets.len() { + return None; + } + + let entry = inner.buckets[pos].inner.as_ref(); + match entry { + Some((key, _)) => Some(OccupiedEntry { + _key: key.clone(), + bucket_pos: pos as u32, + prev_pos: entry::PrevPos::Unknown( + self.get_hash_value(&key) + ), + map: inner, + }), + _ => None, + } } /// Returns the number of buckets in the table. @@ -299,7 +334,7 @@ where } /// Clears all entries in a table. Does not reset any shrinking operations. - pub fn clear(&mut self) { + pub fn clear(&self) { let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); let inner = &mut map.inner; inner.clear(); @@ -353,7 +388,7 @@ where } /// Rehash the map without growing or shrinking. - pub fn shuffle(&mut self) { + pub fn shuffle(&self) { let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); let inner = &mut map.inner; let num_buckets = inner.get_num_buckets() as u32; @@ -447,14 +482,17 @@ where /// - Calling this function on a map initialized with [`HashMapInit::with_fixed`]. /// - Calling this function on a map when no shrink operation is in progress. /// - Calling this function on a map with `shrink_mode` set to [`HashMapShrinkMode::Remap`] and - /// [`HashMapAccess::get_num_buckets_in_use`] returns a value higher than [`HashMapAccess::shrink_goal`]. + /// there are more buckets in use than the value returned by [`HashMapAccess::shrink_goal`]. /// /// # Errors /// Returns an [`shmem::Error`] if any errors occur resizing the memory region. - pub fn finish_shrink(&mut self) -> Result<(), shmem::Error> { + pub fn finish_shrink(&self) -> Result<(), shmem::Error> { let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); let inner = &mut map.inner; - assert!(inner.is_shrinking(), "called finish_shrink when no shrink is in progress"); + assert!( + inner.alloc_limit != INVALID_POS, + "called finish_shrink when no shrink is in progress" + ); let num_buckets = inner.alloc_limit; @@ -470,7 +508,7 @@ where for i in (num_buckets as usize)..inner.buckets.len() { if let Some((k, v)) = inner.buckets[i].inner.take() { - // alloc bucket increases buckets in use, so need to decrease since we're just moving + // alloc_bucket increases count, so need to decrease since we're just moving inner.buckets_in_use -= 1; inner.alloc_bucket(k, v).unwrap(); } @@ -491,28 +529,4 @@ where Ok(()) } - - #[cfg(feature = "stats")] - pub fn dict_len(&self) -> usize { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - map.inner.dictionary.len() - } - - #[cfg(feature = "stats")] - pub fn chain_distribution(&self) -> (Vec<(usize, usize)>, usize) { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - let mut out = Vec::new(); - let mut max = 0; - for (i, d) in map.inner.dictionary.iter().enumerate() { - let mut curr = *d; - let mut len = 0; - while curr != INVALID_POS { - curr = map.inner.buckets[curr as usize].next; - len += 1; - } - out.push((i, len)); - max = max.max(len); - } - (out, max) - } } diff --git a/libs/neon-shmem/src/hash/core.rs b/libs/neon-shmem/src/hash/core.rs index b2cf788d21..28c58e851e 100644 --- a/libs/neon-shmem/src/hash/core.rs +++ b/libs/neon-shmem/src/hash/core.rs @@ -34,7 +34,7 @@ pub(crate) struct CoreHashMap<'a, K, V> { } /// Error for when there are no empty buckets left but one is needed. -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct FullError(); impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> { @@ -155,11 +155,6 @@ impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> { self.buckets.len() } - /// Returns whether there is an ongoing shrink operation. - pub fn is_shrinking(&self) -> bool { - self.alloc_limit != INVALID_POS - } - /// Clears all entries from the hashmap. /// /// Does not reset any allocation limits, but does clear any entries beyond them. @@ -174,32 +169,14 @@ impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> { inner: None, } } - for i in 0..self.dictionary.len() { self.dictionary[i] = INVALID_POS; } + self.free_head = 0; self.buckets_in_use = 0; } - /// Optionally gets the entry at an index if it is occupied. - pub fn entry_at_bucket(&mut self, pos: usize) -> Option> { - if pos >= self.buckets.len() { - return None; - } - - let entry = self.buckets[pos].inner.as_ref(); - match entry { - Some((key, _)) => Some(OccupiedEntry { - _key: key.clone(), - bucket_pos: pos as u32, - prev_pos: PrevPos::Unknown, - map: self, - }), - _ => None, - } - } - /// Find the position of an unused bucket via the freelist and initialize it. pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result { let mut pos = self.free_head; @@ -225,7 +202,7 @@ impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> { let next_pos = self.buckets[pos as usize].next; self.buckets[p as usize].next = next_pos; }, - PrevPos::Unknown => unreachable!() + _ => unreachable!() } // Initialize the bucket. diff --git a/libs/neon-shmem/src/hash/entry.rs b/libs/neon-shmem/src/hash/entry.rs index 5231061b8e..b4c973d9f5 100644 --- a/libs/neon-shmem/src/hash/entry.rs +++ b/libs/neon-shmem/src/hash/entry.rs @@ -19,7 +19,7 @@ pub(crate) enum PrevPos { /// Regular index within the buckets. Chained(u32), /// Unknown - e.g. the associated entry was retrieved by index instead of chain. - Unknown, + Unknown(u64), } /// View into an occupied entry within the map. @@ -31,7 +31,7 @@ pub struct OccupiedEntry<'a, 'b, K, V> { /// The index of the previous entry in the chain. pub(crate) prev_pos: PrevPos, /// The position of the bucket in the [`CoreHashMap`] bucket array. - pub(crate) bucket_pos: u32, + pub(crate) bucket_pos: u32, } impl OccupiedEntry<'_, '_, K, V> { @@ -60,22 +60,39 @@ impl OccupiedEntry<'_, '_, K, V> { /// Removes the entry from the hash map, returning the value originally stored within it. /// + /// This may result in multiple bucket accesses if the entry was obtained by index as the + /// previous chain entry needs to be discovered in this case. + /// /// # Panics /// Panics if the `prev_pos` field is equal to [`PrevPos::Unknown`]. In practice, this means /// the entry was obtained via calling something like [`CoreHashMap::entry_at_bucket`]. pub fn remove(self) -> V { + // If this bucket was queried by index, go ahead and follow its chain from the start. + let prev = if let PrevPos::Unknown(hash) = self.prev_pos { + let dict_idx = hash as usize % self.map.dictionary.len(); + let mut prev = PrevPos::First(dict_idx as u32); + let mut curr = self.map.dictionary[dict_idx]; + while curr != self.bucket_pos { + curr = self.map.buckets[curr as usize].next; + prev = PrevPos::Chained(curr); + } + prev + } else { + self.prev_pos + }; + // CoreHashMap::remove returns Option<(K, V)>. We know it's Some for an OccupiedEntry. let bucket = &mut self.map.buckets[self.bucket_pos as usize]; - + // unlink it from the chain - match self.prev_pos { + match prev { PrevPos::First(dict_pos) => { self.map.dictionary[dict_pos as usize] = bucket.next; }, PrevPos::Chained(bucket_pos) => { self.map.buckets[bucket_pos as usize].next = bucket.next; }, - PrevPos::Unknown => panic!("can't safely remove entry with unknown previous entry"), + _ => unreachable!(), } // and add it to the freelist diff --git a/libs/neon-shmem/src/hash/tests.rs b/libs/neon-shmem/src/hash/tests.rs index 209db599b5..d838aa0b86 100644 --- a/libs/neon-shmem/src/hash/tests.rs +++ b/libs/neon-shmem/src/hash/tests.rs @@ -6,6 +6,7 @@ use std::mem::MaybeUninit; use crate::hash::HashMapAccess; use crate::hash::HashMapInit; use crate::hash::Entry; +use crate::hash::core::FullError; use rand::seq::SliceRandom; use rand::{Rng, RngCore}; @@ -40,8 +41,7 @@ fn test_inserts + Copy>(keys: &[K]) { ).attach_writer(); for (idx, k) in keys.iter().enumerate() { - let hash = w.get_hash_value(&(*k).into()); - let res = w.entry_with_hash((*k).into(), hash); + let res = w.entry((*k).into()); match res { Entry::Occupied(mut e) => { e.insert(idx); } Entry::Vacant(e) => { @@ -52,8 +52,7 @@ fn test_inserts + Copy>(keys: &[K]) { } for (idx, k) in keys.iter().enumerate() { - let hash = w.get_hash_value(&(*k).into()); - let x = w.get_with_hash(&(*k).into(), hash); + let x = w.get(&(*k).into()); let value = x.as_deref().copied(); assert_eq!(value, Some(idx)); } @@ -110,8 +109,7 @@ fn apply_op( shadow.remove(&op.0) }; - let hash = map.get_hash_value(&op.0); - let entry = map.entry_with_hash(op.0, hash); + let entry = map.entry(op.0); let hash_existing = match op.1 { Some(new) => { match entry { @@ -152,8 +150,7 @@ fn do_deletes( ) { for _ in 0..num_ops { let (k, _) = shadow.pop_first().unwrap(); - let hash = writer.get_hash_value(&k); - writer.remove_with_hash(&k, hash); + writer.remove(&k); } } @@ -162,16 +159,20 @@ fn do_shrink( shadow: &mut BTreeMap, to: u32 ) { + assert!(writer.shrink_goal().is_none()); writer.begin_shrink(to); + assert_eq!(writer.shrink_goal(), Some(to as usize)); while writer.get_num_buckets_in_use() > to as usize { let (k, _) = shadow.pop_first().unwrap(); - let hash = writer.get_hash_value(&k); - let entry = writer.entry_with_hash(k, hash); + let entry = writer.entry(k); if let Entry::Occupied(e) = entry { e.remove(); } } + let old_usage = writer.get_num_buckets_in_use(); writer.finish_shrink().unwrap(); + assert!(writer.shrink_goal().is_none()); + assert_eq!(writer.get_num_buckets_in_use(), old_usage); } #[test] @@ -219,10 +220,80 @@ fn test_grow() { let mut rng = rand::rng(); do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng); + let old_usage = writer.get_num_buckets_in_use(); writer.grow(1500).unwrap(); + assert_eq!(writer.get_num_buckets_in_use(), old_usage); + assert_eq!(writer.get_num_buckets(), 1500); do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng); } +#[test] +fn test_clear() { + let mut writer = HashMapInit::::new_resizeable_named( + 1500, 2000, "test_clear" + ).attach_writer(); + let mut shadow: std::collections::BTreeMap = BTreeMap::new(); + let mut rng = rand::rng(); + do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng); + writer.clear(); + assert_eq!(writer.get_num_buckets_in_use(), 0); + assert_eq!(writer.get_num_buckets(), 1500); + while let Some((key, _)) = shadow.pop_first() { + assert!(writer.get(&key).is_none()); + } + do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng); + for i in 0..(1500 - writer.get_num_buckets_in_use()) { + writer.insert((1500 + i as u128).into(), 0).unwrap(); + } + assert_eq!(writer.insert(5000.into(), 0), Err(FullError {})); + writer.clear(); + assert!(writer.insert(5000.into(), 0).is_ok()); +} + +#[test] +fn test_idx_remove() { + let mut writer = HashMapInit::::new_resizeable_named( + 1500, 2000, "test_clear" + ).attach_writer(); + let mut shadow: std::collections::BTreeMap = BTreeMap::new(); + let mut rng = rand::rng(); + do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng); + for _ in 0..100 { + let idx = (rng.next_u32() % 1500) as usize; + if let Some(e) = writer.entry_at_bucket(idx) { + shadow.remove(&e._key); + e.remove(); + } + + } + while let Some((key, val)) = shadow.pop_first() { + assert_eq!(writer.get(&key), Some(&val)); + } +} + +#[test] +fn test_idx_get() { + let mut writer = HashMapInit::::new_resizeable_named( + 1500, 2000, "test_clear" + ).attach_writer(); + let mut shadow: std::collections::BTreeMap = BTreeMap::new(); + let mut rng = rand::rng(); + do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng); + for _ in 0..100 { + let idx = (rng.next_u32() % 1500) as usize; + if let Some(mut e) = writer.entry_at_bucket(idx) { + { + let v: *const usize = e.get(); + assert_eq!(writer.get_bucket_for_value(v), idx); + } + { + let v: *const usize = e.get_mut(); + assert_eq!(writer.get_bucket_for_value(v), idx); + } + } + } +} + #[test] fn test_shrink() { let mut writer = HashMapInit::::new_resizeable_named( @@ -231,8 +302,9 @@ fn test_shrink() { let mut shadow: std::collections::BTreeMap = BTreeMap::new(); let mut rng = rand::rng(); - do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng); - do_shrink(&mut writer, &mut shadow, 1000); + do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng); + do_shrink(&mut writer, &mut shadow, 1000); + assert_eq!(writer.get_num_buckets(), 1000); do_deletes(500, &mut writer, &mut shadow); do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng); assert!(writer.get_num_buckets_in_use() <= 1000); @@ -267,15 +339,14 @@ fn test_bucket_ops() { let mut writer = HashMapInit::::new_resizeable_named( 1000, 1200, "test_bucket_ops" ).attach_writer(); - let hash = writer.get_hash_value(&1.into()); - match writer.entry_with_hash(1.into(), hash) { + match writer.entry(1.into()) { Entry::Occupied(mut e) => { e.insert(2); }, Entry::Vacant(e) => { e.insert(2).unwrap(); }, } assert_eq!(writer.get_num_buckets_in_use(), 1); assert_eq!(writer.get_num_buckets(), 1000); - assert_eq!(writer.get_with_hash(&1.into(), hash), Some(&2)); - let pos = match writer.entry_with_hash(1.into(), hash) { + assert_eq!(writer.get(&1.into()), Some(&2)); + let pos = match writer.entry(1.into()) { Entry::Occupied(e) => { assert_eq!(e._key, 1.into()); let pos = e.bucket_pos as usize; @@ -285,10 +356,10 @@ fn test_bucket_ops() { }, Entry::Vacant(_) => { panic!("Insert didn't affect entry"); }, }; - let ptr: *const usize = writer.get_with_hash(&1.into(), hash).unwrap(); + let ptr: *const usize = writer.get(&1.into()).unwrap(); assert_eq!(writer.get_bucket_for_value(ptr), pos); - writer.remove_with_hash(&1.into(), hash); - assert_eq!(writer.get_with_hash(&1.into(), hash), None); + writer.remove(&1.into()); + assert_eq!(writer.get(&1.into()), None); } #[test] @@ -302,15 +373,14 @@ fn test_shrink_zero() { } writer.finish_shrink().unwrap(); assert_eq!(writer.get_num_buckets_in_use(), 0); - let hash = writer.get_hash_value(&1.into()); - let entry = writer.entry_with_hash(1.into(), hash); + let entry = writer.entry(1.into()); if let Entry::Vacant(v) = entry { assert!(v.insert(2).is_err()); } else { panic!("Somehow got non-vacant entry in empty map.") } writer.grow(50).unwrap(); - let entry = writer.entry_with_hash(1.into(), hash); + let entry = writer.entry(1.into()); if let Entry::Vacant(v) = entry { assert!(v.insert(2).is_ok()); } else { From 9d3e07ef2cebb53ba439df6d7e39195d1cf22d5d Mon Sep 17 00:00:00 2001 From: David Freifeld Date: Mon, 30 Jun 2025 17:05:32 -0700 Subject: [PATCH 5/7] Add initial prototype of shmem sync primitives --- Cargo.lock | 1 + libs/neon-shmem/Cargo.toml | 1 + libs/neon-shmem/src/lib.rs | 1 + libs/neon-shmem/src/sync.rs | 255 ++++++++++++++++++++++++++++++++++++ 4 files changed, 258 insertions(+) create mode 100644 libs/neon-shmem/src/sync.rs diff --git a/Cargo.lock b/Cargo.lock index 4fd5f5802b..4bb6d2d474 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4037,6 +4037,7 @@ dependencies = [ "criterion", "foldhash", "hashbrown 0.15.4 (git+https://github.com/quantumish/hashbrown.git?rev=6610e6d)", + "libc", "nix 0.30.1", "rand 0.9.1", "rand_distr 0.5.1", diff --git a/libs/neon-shmem/Cargo.toml b/libs/neon-shmem/Cargo.toml index 08b5ac2138..8306bbf778 100644 --- a/libs/neon-shmem/Cargo.toml +++ b/libs/neon-shmem/Cargo.toml @@ -10,6 +10,7 @@ nix.workspace = true workspace_hack = { version = "0.1", path = "../../workspace_hack" } rustc-hash = { version = "2.1.1" } rand = "0.9.1" +libc.workspace = true [dev-dependencies] criterion = { workspace = true, features = ["html_reports"] } diff --git a/libs/neon-shmem/src/lib.rs b/libs/neon-shmem/src/lib.rs index f601010122..61ca168073 100644 --- a/libs/neon-shmem/src/lib.rs +++ b/libs/neon-shmem/src/lib.rs @@ -2,3 +2,4 @@ pub mod hash; pub mod shmem; +pub mod sync; diff --git a/libs/neon-shmem/src/sync.rs b/libs/neon-shmem/src/sync.rs new file mode 100644 index 0000000000..68ef7b904d --- /dev/null +++ b/libs/neon-shmem/src/sync.rs @@ -0,0 +1,255 @@ +//! Simple utilities akin to what's in [`std::sync`] but designed to work with shared memory. + +use std::mem::MaybeUninit; +use std::ptr::NonNull; +use std::cell::UnsafeCell; +use std::ops::{Deref, DerefMut}; +use thiserror::Error; + +/// Shared memory read-write lock. +struct RwLock<'a, T: ?Sized> { + inner: &'a mut libc::pthread_rwlock_t, + data: UnsafeCell, +} + +/// RAII guard for a read lock. +struct RwLockReadGuard<'a, 'b, T: ?Sized> { + data: NonNull, + lock: &'a RwLock<'b, T>, +} + +/// RAII guard for a write lock. +struct RwLockWriteGuard<'a, 'b, T: ?Sized> { + lock: &'a RwLock<'b, T>, +} + +// TODO(quantumish): Support poisoning errors? +#[derive(Error, Debug)] +enum RwLockError { + #[error("deadlock detected")] + Deadlock, + #[error("max number of read locks exceeded")] + MaxReadLocks, + #[error("nonblocking operation would block")] + WouldBlock, +} + +unsafe impl Send for RwLock<'_, T> {} +unsafe impl Sync for RwLock<'_, T> {} + +impl<'a, T> RwLock<'a, T> { + fn new(lock: &'a mut MaybeUninit, data: T) -> Self { + unsafe { + let mut attrs = MaybeUninit::uninit(); + // Ignoring return value here - only possible error is OOM. + libc::pthread_rwlockattr_init(attrs.as_mut_ptr()); + libc::pthread_rwlockattr_setpshared( + attrs.as_mut_ptr(), + libc::PTHREAD_PROCESS_SHARED + ); + // TODO(quantumish): worth making this function return Result? + libc::pthread_rwlock_init(lock.as_mut_ptr(), attrs.as_mut_ptr()); + // Safety: POSIX specifies that "any function affecting the attributes + // object (including destruction) shall not affect any previously + // initialized read-write locks". + libc::pthread_rwlockattr_destroy(attrs.as_mut_ptr()); + Self { + inner: lock.assume_init_mut(), + data: data.into(), + } + } + } + + fn read(&self) -> Result, RwLockError> { + unsafe { + let res = libc::pthread_rwlock_rdlock(self.inner as *const _ as *mut _); + match res { + 0 => (), + libc::EINVAL => panic!("failed to properly initialize lock"), + libc::EDEADLK => return Err(RwLockError::Deadlock), + libc::EAGAIN => return Err(RwLockError::MaxReadLocks), + e => panic!("unknown error code returned: {e}") + } + Ok(RwLockReadGuard { + data: NonNull::new_unchecked(self.data.get()), + lock: self + }) + } + } + + fn try_read(&self) -> Result, RwLockError> { + unsafe { + let res = libc::pthread_rwlock_tryrdlock(self.inner as *const _ as *mut _); + match res { + 0 => (), + libc::EINVAL => panic!("failed to properly initialize lock"), + libc::EDEADLK => return Err(RwLockError::Deadlock), + libc::EAGAIN => return Err(RwLockError::MaxReadLocks), + libc::EBUSY => return Err(RwLockError::WouldBlock), + e => panic!("unknown error code returned: {e}") + } + Ok(RwLockReadGuard { + data: NonNull::new_unchecked(self.data.get()), + lock: self + }) + } + } + + fn write(&self) -> Result, RwLockError> { + unsafe { + let res = libc::pthread_rwlock_wrlock(self.inner as *const _ as *mut _); + match res { + 0 => (), + libc::EINVAL => panic!("failed to properly initialize lock"), + libc::EDEADLK => return Err(RwLockError::Deadlock), + e => panic!("unknown error code returned: {e}") + } + } + Ok(RwLockWriteGuard { lock: self }) + } + + fn try_write(&self) -> Result, RwLockError> { + unsafe { + let res = libc::pthread_rwlock_trywrlock(self.inner as *const _ as *mut _); + match res { + 0 => (), + libc::EINVAL => panic!("failed to properly initialize lock"), + libc::EDEADLK => return Err(RwLockError::Deadlock), + libc::EBUSY => return Err(RwLockError::WouldBlock), + e => panic!("unknown error code returned: {e}") + } + } + Ok(RwLockWriteGuard { lock: self }) + } +} + +unsafe impl Sync for RwLockReadGuard<'_, '_, T> {} +unsafe impl Sync for RwLockWriteGuard<'_, '_, T> {} + +impl Deref for RwLockReadGuard<'_, '_, T> { + type Target = T; + + fn deref(&self) -> &T { + unsafe { self.data.as_ref() } + } +} + +impl Deref for RwLockWriteGuard<'_, '_, T> { + type Target = T; + + fn deref(&self) -> &T { + unsafe { &*self.lock.data.get() } + } +} + +impl DerefMut for RwLockWriteGuard<'_, '_, T> { + fn deref_mut(&mut self) -> &mut T { + unsafe { &mut *self.lock.data.get() } + } +} + +impl Drop for RwLockReadGuard<'_, '_, T> { + fn drop(&mut self) -> () { + let res = unsafe { libc::pthread_rwlock_unlock( + self.lock.inner as *const _ as *mut _ + ) }; + debug_assert!(res == 0); + } +} + +impl Drop for RwLockWriteGuard<'_, '_, T> { + fn drop(&mut self) -> () { + let res = unsafe { libc::pthread_rwlock_unlock( + self.lock.inner as *const _ as *mut _ + ) }; + debug_assert!(res == 0); + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use super::*; +use RwLockError::*; + + #[test] + fn test_single_process() { + let mut lock = MaybeUninit::uninit(); + let wrapper = RwLock::new(&mut lock, 0); + let mut writer = wrapper.write().unwrap(); + assert!(matches!(wrapper.try_write(), Err(Deadlock | WouldBlock))); + assert!(matches!(wrapper.try_read(), Err(Deadlock | WouldBlock))); + *writer = 5; + drop(writer); + let reader = wrapper.read().unwrap(); + assert!(matches!(wrapper.try_write(), Err(Deadlock | WouldBlock))); + assert!(matches!(wrapper.read(), Ok(_))); + assert_eq!(*reader, 5); + drop(reader); + assert!(matches!(wrapper.try_write(), Ok(_))); + } + + #[test] + fn test_multi_thread() { + let lock = Box::new(MaybeUninit::uninit()); + let wrapper = Arc::new(RwLock::new(Box::leak(lock), 0)); + let mut writer = wrapper.write().unwrap(); + let t1 = { + let wrapper = wrapper.clone(); + std::thread::spawn(move || { + let mut writer = wrapper.write().unwrap(); + *writer = 20; + }) + }; + assert_eq!(*writer, 0); + *writer = 10; + assert_eq!(*writer, 10); + drop(writer); + t1.join().unwrap(); + let mut writer = wrapper.write().unwrap(); + assert_eq!(*writer, 20); + drop(writer); + let mut handles = vec![]; + for _ in 0..5 { + handles.push({ + let wrapper = wrapper.clone(); + std::thread::spawn(move || { + let reader = wrapper.read().unwrap(); + assert_eq!(*reader, 20); + }) + }); + } + for h in handles { + h.join().unwrap(); + } + let writer = wrapper.write().unwrap(); + assert_eq!(*writer, 20); + } + + // // TODO(quantumish): Terrible time-based synchronization, fix me. + // #[test] + // fn test_multi_process() { + // let max_size = 100; + // let init_struct = crate::shmem::ShmemHandle::new("test_multi_process", 0, max_size).unwrap(); + // let ptr = init_struct.data_ptr.as_ptr(); + // let lock: &mut _ = unsafe { ptr.add( + // ptr.align_offset(std::mem::align_of::>()) + // ).cast::>().as_mut().unwrap() } ; + // let wrapper = RwLock::new(lock, 0); + + // let fork_result = unsafe { nix::unistd::fork().unwrap() }; + + // if !fork_result.is_parent() { + // let mut writer = wrapper.write().unwrap(); + // std::thread::sleep(std::time::Duration::from_secs(5)); + // *writer = 2; + // } else { + // std::thread::sleep(std::time::Duration::from_secs(1)); + // assert!(matches!(wrapper.try_write(), Err(WouldBlock))); + // std::thread::sleep(std::time::Duration::from_secs(10)); + // let writer = wrapper.try_write().unwrap(); + // assert_eq!(*writer, 2); + // } + // } +} From 19b5618578ceab75b6ead3bf84d9ae607203624c Mon Sep 17 00:00:00 2001 From: David Freifeld Date: Wed, 2 Jul 2025 11:44:38 -0700 Subject: [PATCH 6/7] Switch to neon_shmem::sync lock_api and integrate into hashmap --- Cargo.lock | 1 + libs/neon-shmem/Cargo.toml | 1 + libs/neon-shmem/src/hash.rs | 233 ++++++++++++------------- libs/neon-shmem/src/hash/core.rs | 44 +---- libs/neon-shmem/src/hash/entry.rs | 27 +-- libs/neon-shmem/src/hash/tests.rs | 22 ++- libs/neon-shmem/src/sync.rs | 280 +++++++----------------------- 7 files changed, 210 insertions(+), 398 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4bb6d2d474..f798a1bdda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4038,6 +4038,7 @@ dependencies = [ "foldhash", "hashbrown 0.15.4 (git+https://github.com/quantumish/hashbrown.git?rev=6610e6d)", "libc", + "lock_api", "nix 0.30.1", "rand 0.9.1", "rand_distr 0.5.1", diff --git a/libs/neon-shmem/Cargo.toml b/libs/neon-shmem/Cargo.toml index 8306bbf778..8ce5b52deb 100644 --- a/libs/neon-shmem/Cargo.toml +++ b/libs/neon-shmem/Cargo.toml @@ -11,6 +11,7 @@ workspace_hack = { version = "0.1", path = "../../workspace_hack" } rustc-hash = { version = "2.1.1" } rand = "0.9.1" libc.workspace = true +lock_api = "0.4.13" [dev-dependencies] criterion = { workspace = true, features = ["html_reports"] } diff --git a/libs/neon-shmem/src/hash.rs b/libs/neon-shmem/src/hash.rs index 7d47a4f5e5..733e4b6f33 100644 --- a/libs/neon-shmem/src/hash.rs +++ b/libs/neon-shmem/src/hash.rs @@ -16,9 +16,9 @@ use std::hash::{Hash, BuildHasher}; use std::mem::MaybeUninit; -use std::default::Default; -use crate::{shmem, shmem::ShmemHandle}; +use crate::{shmem, sync::*}; +use crate::shmem::ShmemHandle; mod core; pub mod entry; @@ -27,15 +27,14 @@ pub mod entry; mod tests; use core::{Bucket, CoreHashMap, INVALID_POS}; -use entry::{Entry, OccupiedEntry}; +use entry::{Entry, OccupiedEntry, VacantEntry, PrevPos}; /// Builder for a [`HashMapAccess`]. #[must_use] pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> { shmem_handle: Option, - shared_ptr: *mut HashMapShared<'a, K, V>, + shared_ptr: *mut RwLock>, shared_size: usize, - shrink_mode: HashMapShrinkMode, hasher: S, num_buckets: u32, } @@ -45,28 +44,6 @@ pub struct HashMapAccess<'a, K, V, S = rustc_hash::FxBuildHasher> { shmem_handle: Option, shared_ptr: *mut HashMapShared<'a, K, V>, hasher: S, - shrink_mode: HashMapShrinkMode, -} - -/// Enum specifying what behavior to have surrounding occupied entries in what is -/// about-to-be-shrinked space during a call to [`HashMapAccess::finish_shrink`]. -#[derive(PartialEq, Eq)] -pub enum HashMapShrinkMode { - /// Remap entry to the range of buckets that will remain after shrinking. - /// - /// Requires that caller has left enough room within the map such that this is possible. - Remap, - /// Remove any entries remaining in soon to be deallocated space. - /// - /// Only really useful if you legitimately do not care what entries are removed. - /// Should primarily be used for testing. - Remove, -} - -impl Default for HashMapShrinkMode { - fn default() -> Self { - Self::Remap - } } unsafe impl Sync for HashMapAccess<'_, K, V, S> {} @@ -80,14 +57,9 @@ impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> { shared_ptr: self.shared_ptr, shared_size: self.shared_size, num_buckets: self.num_buckets, - shrink_mode: self.shrink_mode, } } - pub fn with_shrink_mode(self, mode: HashMapShrinkMode) -> Self { - Self { shrink_mode: mode, ..self } - } - /// Loosely (over)estimate the size needed to store a hash table with `num_buckets` buckets. pub fn estimate_size(num_buckets: u32) -> usize { // add some margin to cover alignment etc. @@ -96,13 +68,17 @@ impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> { /// Initialize a table for writing. pub fn attach_writer(self) -> HashMapAccess<'a, K, V, S> { - // carve out the HashMapShared struct from the area. let mut ptr: *mut u8 = self.shared_ptr.cast(); let end_ptr: *mut u8 = unsafe { ptr.add(self.shared_size) }; - ptr = unsafe { ptr.add(ptr.align_offset(align_of::>())) }; - let shared_ptr: *mut HashMapShared = ptr.cast(); - ptr = unsafe { ptr.add(size_of::>()) }; + // carve out area for the One Big Lock (TM) and the HashMapShared. + ptr = unsafe { ptr.add(ptr.align_offset(align_of::())) }; + let raw_lock_ptr = ptr; + ptr = unsafe { ptr.add(size_of::()) }; + ptr = unsafe { ptr.add(ptr.align_offset(align_of::>())) }; + let shared_ptr: *mut HashMapShared = ptr.cast(); + ptr = unsafe { ptr.add(size_of::>()) }; + // carve out the buckets ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::>())) }; let buckets_ptr = ptr; @@ -121,14 +97,14 @@ impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> { std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize) }; let hashmap = CoreHashMap::new(buckets, dictionary); - unsafe { - std::ptr::write(shared_ptr, HashMapShared { inner: hashmap }); - } + let lock = RwLock::from_raw(PthreadRwLock::new(raw_lock_ptr.cast()), hashmap); + unsafe { + std::ptr::write(shared_ptr, lock); + } HashMapAccess { shmem_handle: self.shmem_handle, - shared_ptr: self.shared_ptr, - shrink_mode: self.shrink_mode, + shared_ptr, hasher: self.hasher, } } @@ -145,14 +121,13 @@ impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> { /// relies on the memory layout! The data structures are laid out in the contiguous shared memory /// area as follows: /// +/// [`libc::pthread_rwlock_t`] /// [`HashMapShared`] /// [buckets] /// [dictionary] /// /// In between the above parts, there can be padding bytes to align the parts correctly. -struct HashMapShared<'a, K, V> { - inner: CoreHashMap<'a, K, V> -} +type HashMapShared<'a, K, V> = RwLock>; impl<'a, K, V> HashMapInit<'a, K, V, rustc_hash::FxBuildHasher> where @@ -168,7 +143,6 @@ where shmem_handle: None, shared_ptr: area.as_mut_ptr().cast(), shared_size: area.len(), - shrink_mode: HashMapShrinkMode::default(), hasher: rustc_hash::FxBuildHasher, } } @@ -187,7 +161,6 @@ where shared_ptr: shmem.data_ptr.as_ptr().cast(), shmem_handle: Some(shmem), shared_size: size, - shrink_mode: HashMapShrinkMode::default(), hasher: rustc_hash::FxBuildHasher } } @@ -204,7 +177,6 @@ where shared_ptr: shmem.data_ptr.as_ptr().cast(), shmem_handle: Some(shmem), shared_size: size, - shrink_mode: HashMapShrinkMode::default(), hasher: rustc_hash::FxBuildHasher } } @@ -229,25 +201,64 @@ where self.hasher.hash_one(key) } + fn entry_with_hash(&self, key: K, hash: u64) -> Entry<'a, '_, K, V> { + let mut map = unsafe { self.shared_ptr.as_ref() }.unwrap().write(); + let dict_pos = hash as usize % map.dictionary.len(); + let first = map.dictionary[dict_pos]; + if first == INVALID_POS { + // no existing entry + return Entry::Vacant(VacantEntry { + map, + key, + dict_pos: dict_pos as u32, + }); + } + + let mut prev_pos = PrevPos::First(dict_pos as u32); + let mut next = first; + loop { + let bucket = &mut map.buckets[next as usize]; + let (bucket_key, _bucket_value) = bucket.inner.as_mut().expect("entry is in use"); + if *bucket_key == key { + // found existing entry + return Entry::Occupied(OccupiedEntry { + map, + _key: key, + prev_pos, + bucket_pos: next, + }); + } + + if bucket.next == INVALID_POS { + // No existing entry + return Entry::Vacant(VacantEntry { + map, + key, + dict_pos: dict_pos as u32, + }); + } + prev_pos = PrevPos::Chained(next); + next = bucket.next; + } + } + /// Get a reference to the corresponding value for a key. - pub fn get<'e>(&'e self, key: &K) -> Option<&'e V> { - let map = unsafe { self.shared_ptr.as_ref() }.unwrap(); + pub fn get<'e>(&'e self, key: &K) -> Option> { let hash = self.get_hash_value(key); - map.inner.get_with_hash(key, hash) + let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read(); + RwLockReadGuard::try_map(map, |m| m.get_with_hash(key, hash)).ok() } /// Get a reference to the entry containing a key. pub fn entry(&self, key: K) -> Entry<'a, '_, K, V> { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); let hash = self.get_hash_value(&key); - map.inner.entry_with_hash(key, hash) + self.entry_with_hash(key, hash) } /// Remove a key given its hash. Returns the associated value if it existed. pub fn remove(&self, key: &K) -> Option { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); let hash = self.get_hash_value(&key); - match map.inner.entry_with_hash(key.clone(), hash) { + match self.entry_with_hash(key.clone(), hash) { Entry::Occupied(e) => Some(e.remove()), Entry::Vacant(_) => None } @@ -258,12 +269,11 @@ where /// # Errors /// Will return [`core::FullError`] if there is no more space left in the map. pub fn insert(&self, key: K, value: V) -> Result, core::FullError> { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); let hash = self.get_hash_value(&key); - match map.inner.entry_with_hash(key.clone(), hash) { + match self.entry_with_hash(key.clone(), hash) { Entry::Occupied(mut e) => Ok(Some(e.insert(value))), Entry::Vacant(e) => { - e.insert(value)?; + _ = e.insert(value)?; Ok(None) } } @@ -275,13 +285,12 @@ where /// due to the [`OccupiedEntry`] type owning the key and also a hash of the key in order /// to enable repairing the hash chain if the entry is removed. pub fn entry_at_bucket(&self, pos: usize) -> Option> { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - let inner = &mut map.inner; - if pos >= inner.buckets.len() { + let map = unsafe { self.shared_ptr.as_mut() }.unwrap().write(); + if pos >= map.buckets.len() { return None; } - let entry = inner.buckets[pos].inner.as_ref(); + let entry = map.buckets[pos].inner.as_ref(); match entry { Some((key, _)) => Some(OccupiedEntry { _key: key.clone(), @@ -289,7 +298,7 @@ where prev_pos: entry::PrevPos::Unknown( self.get_hash_value(&key) ), - map: inner, + map, }), _ => None, } @@ -297,8 +306,8 @@ where /// Returns the number of buckets in the table. pub fn get_num_buckets(&self) -> usize { - let map = unsafe { self.shared_ptr.as_ref() }.unwrap(); - map.inner.get_num_buckets() + let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read(); + map.get_num_buckets() } /// Return the key and value stored in bucket with given index. This can be used to @@ -306,38 +315,35 @@ where // TODO: An Iterator might be nicer. The communicator's clock algorithm needs to // _slowly_ iterate through all buckets with its clock hand, without holding a lock. // If we switch to an Iterator, it must not hold the lock. - pub fn get_at_bucket(&self, pos: usize) -> Option<&(K, V)> { - let map = unsafe { self.shared_ptr.as_ref() }.unwrap(); - - if pos >= map.inner.buckets.len() { + pub fn get_at_bucket(&self, pos: usize) -> Option> { + let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read(); + if pos >= map.buckets.len() { return None; } - let bucket = &map.inner.buckets[pos]; - bucket.inner.as_ref() + RwLockReadGuard::try_map(map, |m| m.buckets[pos].inner.as_ref()).ok() } /// Returns the index of the bucket a given value corresponds to. pub fn get_bucket_for_value(&self, val_ptr: *const V) -> usize { - let map = unsafe { self.shared_ptr.as_ref() }.unwrap(); + let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read(); - let origin = map.inner.buckets.as_ptr(); + let origin = map.buckets.as_ptr(); let idx = (val_ptr as usize - origin as usize) / size_of::>(); - assert!(idx < map.inner.buckets.len()); + assert!(idx < map.buckets.len()); idx } /// Returns the number of occupied buckets in the table. pub fn get_num_buckets_in_use(&self) -> usize { - let map = unsafe { self.shared_ptr.as_ref() }.unwrap(); - map.inner.buckets_in_use as usize + let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read(); + map.buckets_in_use as usize } /// Clears all entries in a table. Does not reset any shrinking operations. pub fn clear(&self) { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - let inner = &mut map.inner; - inner.clear(); + let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write(); + map.clear(); } /// Perform an in-place rehash of some region (0..`rehash_buckets`) of the table and reset @@ -389,13 +395,12 @@ where /// Rehash the map without growing or shrinking. pub fn shuffle(&self) { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - let inner = &mut map.inner; - let num_buckets = inner.get_num_buckets() as u32; + let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write(); + let num_buckets = map.get_num_buckets() as u32; let size_bytes = HashMapInit::::estimate_size(num_buckets); let end_ptr: *mut u8 = unsafe { self.shared_ptr.byte_add(size_bytes).cast() }; - let buckets_ptr = inner.buckets.as_mut_ptr(); - self.rehash_dict(inner, buckets_ptr, end_ptr, num_buckets, num_buckets); + let buckets_ptr = map.buckets.as_mut_ptr(); + self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets); } /// Grow the number of buckets within the table. @@ -409,10 +414,9 @@ where /// /// # Errors /// Returns an [`shmem::Error`] if any errors occur resizing the memory region. - pub fn grow(&mut self, num_buckets: u32) -> Result<(), shmem::Error> { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - let inner = &mut map.inner; - let old_num_buckets = inner.buckets.len() as u32; + pub fn grow(&self, num_buckets: u32) -> Result<(), shmem::Error> { + let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write(); + let old_num_buckets = map.buckets.len() as u32; assert!(num_buckets >= old_num_buckets, "grow called with a smaller number of buckets"); if num_buckets == old_num_buckets { @@ -429,7 +433,7 @@ where // Initialize new buckets. The new buckets are linked to the free list. // NB: This overwrites the dictionary! - let buckets_ptr = inner.buckets.as_mut_ptr(); + let buckets_ptr = map.buckets.as_mut_ptr(); unsafe { for i in old_num_buckets..num_buckets { let bucket = buckets_ptr.add(i as usize); @@ -437,15 +441,15 @@ where next: if i < num_buckets-1 { i + 1 } else { - inner.free_head + map.free_head }, inner: None, }); } } - self.rehash_dict(inner, buckets_ptr, end_ptr, num_buckets, old_num_buckets); - inner.free_head = old_num_buckets; + self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, old_num_buckets); + map.free_head = old_num_buckets; Ok(()) } @@ -456,22 +460,22 @@ where /// Panics if called on a map initialized with [`HashMapInit::with_fixed`] or if `num_buckets` is /// greater than the number of buckets in the map. pub fn begin_shrink(&mut self, num_buckets: u32) { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); + let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write(); assert!( - num_buckets <= map.inner.get_num_buckets() as u32, + num_buckets <= map.get_num_buckets() as u32, "shrink called with a larger number of buckets" ); _ = self .shmem_handle .as_ref() .expect("shrink called on a fixed-size hash table"); - map.inner.alloc_limit = num_buckets; + map.alloc_limit = num_buckets; } /// If a shrink operation is underway, returns the target size of the map. Otherwise, returns None. pub fn shrink_goal(&self) -> Option { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - let goal = map.inner.alloc_limit; + let map = unsafe { self.shared_ptr.as_mut() }.unwrap().read(); + let goal = map.alloc_limit; if goal == INVALID_POS { None } else { Some(goal as usize) } } @@ -487,31 +491,28 @@ where /// # Errors /// Returns an [`shmem::Error`] if any errors occur resizing the memory region. pub fn finish_shrink(&self) -> Result<(), shmem::Error> { - let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); - let inner = &mut map.inner; + let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write(); assert!( - inner.alloc_limit != INVALID_POS, + map.alloc_limit != INVALID_POS, "called finish_shrink when no shrink is in progress" ); - let num_buckets = inner.alloc_limit; + let num_buckets = map.alloc_limit; - if inner.get_num_buckets() == num_buckets as usize { + if map.get_num_buckets() == num_buckets as usize { return Ok(()); } - if self.shrink_mode == HashMapShrinkMode::Remap { - assert!( - inner.buckets_in_use <= num_buckets, - "called finish_shrink before enough entries were removed" - ); - - for i in (num_buckets as usize)..inner.buckets.len() { - if let Some((k, v)) = inner.buckets[i].inner.take() { - // alloc_bucket increases count, so need to decrease since we're just moving - inner.buckets_in_use -= 1; - inner.alloc_bucket(k, v).unwrap(); - } + assert!( + map.buckets_in_use <= num_buckets, + "called finish_shrink before enough entries were removed" + ); + + for i in (num_buckets as usize)..map.buckets.len() { + if let Some((k, v)) = map.buckets[i].inner.take() { + // alloc_bucket increases count, so need to decrease since we're just moving + map.buckets_in_use -= 1; + map.alloc_bucket(k, v).unwrap(); } } @@ -523,9 +524,9 @@ where let size_bytes = HashMapInit::::estimate_size(num_buckets); shmem_handle.set_size(size_bytes)?; let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) }; - let buckets_ptr = inner.buckets.as_mut_ptr(); - self.rehash_dict(inner, buckets_ptr, end_ptr, num_buckets, num_buckets); - inner.alloc_limit = INVALID_POS; + let buckets_ptr = map.buckets.as_mut_ptr(); + self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets); + map.alloc_limit = INVALID_POS; Ok(()) } diff --git a/libs/neon-shmem/src/hash/core.rs b/libs/neon-shmem/src/hash/core.rs index 28c58e851e..473c417ece 100644 --- a/libs/neon-shmem/src/hash/core.rs +++ b/libs/neon-shmem/src/hash/core.rs @@ -3,7 +3,7 @@ use std::hash::Hash; use std::mem::MaybeUninit; -use crate::hash::entry::{Entry, OccupiedEntry, PrevPos, VacantEntry}; +use crate::hash::entry::*; /// Invalid position within the map (either within the dictionary or bucket array). pub(crate) const INVALID_POS: u32 = u32::MAX; @@ -29,6 +29,7 @@ pub(crate) struct CoreHashMap<'a, K, V> { pub(crate) alloc_limit: u32, /// The number of currently occupied buckets. pub(crate) buckets_in_use: u32, + // pub(crate) lock: libc::pthread_mutex_t, // Unclear what the purpose of this is. pub(crate) _user_list_head: u32, } @@ -109,47 +110,6 @@ impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> { } } - /// Get the [`Entry`] associated with a key given hash. This should be used for updates/inserts. - pub fn entry_with_hash(&mut self, key: K, hash: u64) -> Entry<'a, '_, K, V> { - let dict_pos = hash as usize % self.dictionary.len(); - let first = self.dictionary[dict_pos]; - if first == INVALID_POS { - // no existing entry - return Entry::Vacant(VacantEntry { - map: self, - key, - dict_pos: dict_pos as u32, - }); - } - - let mut prev_pos = PrevPos::First(dict_pos as u32); - let mut next = first; - loop { - let bucket = &mut self.buckets[next as usize]; - let (bucket_key, _bucket_value) = bucket.inner.as_mut().expect("entry is in use"); - if *bucket_key == key { - // found existing entry - return Entry::Occupied(OccupiedEntry { - map: self, - _key: key, - prev_pos, - bucket_pos: next, - }); - } - - if bucket.next == INVALID_POS { - // No existing entry - return Entry::Vacant(VacantEntry { - map: self, - key, - dict_pos: dict_pos as u32, - }); - } - prev_pos = PrevPos::Chained(next); - next = bucket.next; - } - } - /// Get number of buckets in map. pub fn get_num_buckets(&self) -> usize { self.buckets.len() diff --git a/libs/neon-shmem/src/hash/entry.rs b/libs/neon-shmem/src/hash/entry.rs index b4c973d9f5..a5832665aa 100644 --- a/libs/neon-shmem/src/hash/entry.rs +++ b/libs/neon-shmem/src/hash/entry.rs @@ -1,11 +1,12 @@ //! Equivalent of [`std::collections::hash_map::Entry`] for this hashmap. use crate::hash::core::{CoreHashMap, FullError, INVALID_POS}; +use crate::sync::{RwLockWriteGuard, ValueWriteGuard}; use std::hash::Hash; use std::mem; -/// View into an entry in the map (either vacant or occupied). + pub enum Entry<'a, 'b, K, V> { Occupied(OccupiedEntry<'a, 'b, K, V>), Vacant(VacantEntry<'a, 'b, K, V>), @@ -22,10 +23,9 @@ pub(crate) enum PrevPos { Unknown(u64), } -/// View into an occupied entry within the map. pub struct OccupiedEntry<'a, 'b, K, V> { /// Mutable reference to the map containing this entry. - pub(crate) map: &'b mut CoreHashMap<'a, K, V>, + pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>, /// The key of the occupied entry pub(crate) _key: K, /// The index of the previous entry in the chain. @@ -66,7 +66,7 @@ impl OccupiedEntry<'_, '_, K, V> { /// # Panics /// Panics if the `prev_pos` field is equal to [`PrevPos::Unknown`]. In practice, this means /// the entry was obtained via calling something like [`CoreHashMap::entry_at_bucket`]. - pub fn remove(self) -> V { + pub fn remove(mut self) -> V { // If this bucket was queried by index, go ahead and follow its chain from the start. let prev = if let PrevPos::Unknown(hash) = self.prev_pos { let dict_idx = hash as usize % self.map.dictionary.len(); @@ -90,15 +90,17 @@ impl OccupiedEntry<'_, '_, K, V> { self.map.dictionary[dict_pos as usize] = bucket.next; }, PrevPos::Chained(bucket_pos) => { + // println!("we think prev of {} is {bucket_pos}", self.bucket_pos); self.map.buckets[bucket_pos as usize].next = bucket.next; }, _ => unreachable!(), } - // and add it to the freelist + // and add it to the freelist + let free = self.map.free_head; let bucket = &mut self.map.buckets[self.bucket_pos as usize]; let old_value = bucket.inner.take(); - bucket.next = self.map.free_head; + bucket.next = free; self.map.free_head = self.bucket_pos; self.map.buckets_in_use -= 1; @@ -109,7 +111,7 @@ impl OccupiedEntry<'_, '_, K, V> { /// An abstract view into a vacant entry within the map. pub struct VacantEntry<'a, 'b, K, V> { /// Mutable reference to the map containing this entry. - pub(crate) map: &'b mut CoreHashMap<'a, K, V>, + pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>, /// The key to be inserted into this entry. pub(crate) key: K, /// The position within the dictionary corresponding to the key's hash. @@ -121,16 +123,17 @@ impl<'b, K: Clone + Hash + Eq, V> VacantEntry<'_, 'b, K, V> { /// /// # Errors /// Will return [`FullError`] if there are no unoccupied buckets in the map. - pub fn insert(self, value: V) -> Result<&'b mut V, FullError> { + pub fn insert(mut self, value: V) -> Result, FullError> { let pos = self.map.alloc_bucket(self.key, value)?; if pos == INVALID_POS { return Err(FullError()); } - let bucket = &mut self.map.buckets[pos as usize]; - bucket.next = self.map.dictionary[self.dict_pos as usize]; + self.map.buckets[pos as usize].next = self.map.dictionary[self.dict_pos as usize]; self.map.dictionary[self.dict_pos as usize] = pos; - let result = &mut self.map.buckets[pos as usize].inner.as_mut().unwrap().1; - Ok(result) + Ok(RwLockWriteGuard::map( + self.map, + |m| &mut m.buckets[pos as usize].inner.as_mut().unwrap().1 + )) } } diff --git a/libs/neon-shmem/src/hash/tests.rs b/libs/neon-shmem/src/hash/tests.rs index d838aa0b86..0c760c56b7 100644 --- a/libs/neon-shmem/src/hash/tests.rs +++ b/libs/neon-shmem/src/hash/tests.rs @@ -36,7 +36,7 @@ impl<'a> From<&'a [u8]> for TestKey { } fn test_inserts + Copy>(keys: &[K]) { - let mut w = HashMapInit::::new_resizeable_named( + let w = HashMapInit::::new_resizeable_named( 100000, 120000, "test_inserts" ).attach_writer(); @@ -190,10 +190,6 @@ fn random_ops() { let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None }); apply_op(&op, &mut writer, &mut shadow); - - if i % 1000 == 0 { - eprintln!("{i} ops processed"); - } } } @@ -267,7 +263,7 @@ fn test_idx_remove() { } while let Some((key, val)) = shadow.pop_first() { - assert_eq!(writer.get(&key), Some(&val)); + assert_eq!(*writer.get(&key).unwrap(), val); } } @@ -326,8 +322,10 @@ fn test_shrink_grow_seq() { writer.grow(1500).unwrap(); do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng); eprintln!("Shrinking to 200"); + while shadow.len() > 100 { + do_deletes(1, &mut writer, &mut shadow); + } do_shrink(&mut writer, &mut shadow, 200); - do_deletes(100, &mut writer, &mut shadow); do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng); eprintln!("Growing to 10k"); writer.grow(10000).unwrap(); @@ -336,7 +334,7 @@ fn test_shrink_grow_seq() { #[test] fn test_bucket_ops() { - let mut writer = HashMapInit::::new_resizeable_named( + let writer = HashMapInit::::new_resizeable_named( 1000, 1200, "test_bucket_ops" ).attach_writer(); match writer.entry(1.into()) { @@ -345,21 +343,21 @@ fn test_bucket_ops() { } assert_eq!(writer.get_num_buckets_in_use(), 1); assert_eq!(writer.get_num_buckets(), 1000); - assert_eq!(writer.get(&1.into()), Some(&2)); + assert_eq!(*writer.get(&1.into()).unwrap(), 2); let pos = match writer.entry(1.into()) { Entry::Occupied(e) => { assert_eq!(e._key, 1.into()); let pos = e.bucket_pos as usize; assert_eq!(writer.entry_at_bucket(pos).unwrap()._key, 1.into()); - assert_eq!(writer.get_at_bucket(pos), Some(&(1.into(), 2))); + assert_eq!(*writer.get_at_bucket(pos).unwrap(), (1.into(), 2)); pos }, Entry::Vacant(_) => { panic!("Insert didn't affect entry"); }, }; - let ptr: *const usize = writer.get(&1.into()).unwrap(); + let ptr: *const usize = &*writer.get(&1.into()).unwrap(); assert_eq!(writer.get_bucket_for_value(ptr), pos); writer.remove(&1.into()); - assert_eq!(writer.get(&1.into()), None); + assert!(writer.get(&1.into()).is_none()); } #[test] diff --git a/libs/neon-shmem/src/sync.rs b/libs/neon-shmem/src/sync.rs index 68ef7b904d..8887299a92 100644 --- a/libs/neon-shmem/src/sync.rs +++ b/libs/neon-shmem/src/sync.rs @@ -2,43 +2,18 @@ use std::mem::MaybeUninit; use std::ptr::NonNull; -use std::cell::UnsafeCell; -use std::ops::{Deref, DerefMut}; -use thiserror::Error; + +pub type RwLock = lock_api::RwLock; +pub(crate) type RwLockReadGuard<'a, T> = lock_api::RwLockReadGuard<'a, PthreadRwLock, T>; +pub type RwLockWriteGuard<'a, T> = lock_api::RwLockWriteGuard<'a, PthreadRwLock, T>; +pub type ValueReadGuard<'a, T> = lock_api::MappedRwLockReadGuard<'a, PthreadRwLock, T>; +pub type ValueWriteGuard<'a, T> = lock_api::MappedRwLockWriteGuard<'a, PthreadRwLock, T>; /// Shared memory read-write lock. -struct RwLock<'a, T: ?Sized> { - inner: &'a mut libc::pthread_rwlock_t, - data: UnsafeCell, -} +pub struct PthreadRwLock(Option>); -/// RAII guard for a read lock. -struct RwLockReadGuard<'a, 'b, T: ?Sized> { - data: NonNull, - lock: &'a RwLock<'b, T>, -} - -/// RAII guard for a write lock. -struct RwLockWriteGuard<'a, 'b, T: ?Sized> { - lock: &'a RwLock<'b, T>, -} - -// TODO(quantumish): Support poisoning errors? -#[derive(Error, Debug)] -enum RwLockError { - #[error("deadlock detected")] - Deadlock, - #[error("max number of read locks exceeded")] - MaxReadLocks, - #[error("nonblocking operation would block")] - WouldBlock, -} - -unsafe impl Send for RwLock<'_, T> {} -unsafe impl Sync for RwLock<'_, T> {} - -impl<'a, T> RwLock<'a, T> { - fn new(lock: &'a mut MaybeUninit, data: T) -> Self { +impl PthreadRwLock { + pub fn new(lock: *mut libc::pthread_rwlock_t) -> Self { unsafe { let mut attrs = MaybeUninit::uninit(); // Ignoring return value here - only possible error is OOM. @@ -48,208 +23,81 @@ impl<'a, T> RwLock<'a, T> { libc::PTHREAD_PROCESS_SHARED ); // TODO(quantumish): worth making this function return Result? - libc::pthread_rwlock_init(lock.as_mut_ptr(), attrs.as_mut_ptr()); + libc::pthread_rwlock_init(lock, attrs.as_mut_ptr()); // Safety: POSIX specifies that "any function affecting the attributes // object (including destruction) shall not affect any previously // initialized read-write locks". libc::pthread_rwlockattr_destroy(attrs.as_mut_ptr()); - Self { - inner: lock.assume_init_mut(), - data: data.into(), - } - } - } - - fn read(&self) -> Result, RwLockError> { - unsafe { - let res = libc::pthread_rwlock_rdlock(self.inner as *const _ as *mut _); - match res { - 0 => (), - libc::EINVAL => panic!("failed to properly initialize lock"), - libc::EDEADLK => return Err(RwLockError::Deadlock), - libc::EAGAIN => return Err(RwLockError::MaxReadLocks), - e => panic!("unknown error code returned: {e}") - } - Ok(RwLockReadGuard { - data: NonNull::new_unchecked(self.data.get()), - lock: self - }) - } - } - - fn try_read(&self) -> Result, RwLockError> { - unsafe { - let res = libc::pthread_rwlock_tryrdlock(self.inner as *const _ as *mut _); - match res { - 0 => (), - libc::EINVAL => panic!("failed to properly initialize lock"), - libc::EDEADLK => return Err(RwLockError::Deadlock), - libc::EAGAIN => return Err(RwLockError::MaxReadLocks), - libc::EBUSY => return Err(RwLockError::WouldBlock), - e => panic!("unknown error code returned: {e}") - } - Ok(RwLockReadGuard { - data: NonNull::new_unchecked(self.data.get()), - lock: self - }) + Self(Some(NonNull::new_unchecked(lock))) } } - fn write(&self) -> Result, RwLockError> { + fn inner(&self) -> NonNull { + match self.0 { + None => panic!("PthreadRwLock constructed badly - something likely used RawMutex::INIT"), + Some(x) => x, + } + } +} + +unsafe impl lock_api::RawRwLock for PthreadRwLock { + type GuardMarker = lock_api::GuardSend; + const INIT: Self = Self(None); + + fn lock_shared(&self) { unsafe { - let res = libc::pthread_rwlock_wrlock(self.inner as *const _ as *mut _); - match res { - 0 => (), - libc::EINVAL => panic!("failed to properly initialize lock"), - libc::EDEADLK => return Err(RwLockError::Deadlock), - e => panic!("unknown error code returned: {e}") + let res = libc::pthread_rwlock_rdlock(self.inner().as_ptr()); + if res != 0 { + panic!("rdlock failed with {res}"); } } - Ok(RwLockWriteGuard { lock: self }) } - fn try_write(&self) -> Result, RwLockError> { + fn try_lock_shared(&self) -> bool { unsafe { - let res = libc::pthread_rwlock_trywrlock(self.inner as *const _ as *mut _); + let res = libc::pthread_rwlock_tryrdlock(self.inner().as_ptr()); match res { - 0 => (), - libc::EINVAL => panic!("failed to properly initialize lock"), - libc::EDEADLK => return Err(RwLockError::Deadlock), - libc::EBUSY => return Err(RwLockError::WouldBlock), - e => panic!("unknown error code returned: {e}") + 0 => true, + libc::EAGAIN => false, + o => panic!("try_rdlock failed with {o}") } } - Ok(RwLockWriteGuard { lock: self }) - } -} - -unsafe impl Sync for RwLockReadGuard<'_, '_, T> {} -unsafe impl Sync for RwLockWriteGuard<'_, '_, T> {} - -impl Deref for RwLockReadGuard<'_, '_, T> { - type Target = T; - - fn deref(&self) -> &T { - unsafe { self.data.as_ref() } - } -} - -impl Deref for RwLockWriteGuard<'_, '_, T> { - type Target = T; - - fn deref(&self) -> &T { - unsafe { &*self.lock.data.get() } - } -} - -impl DerefMut for RwLockWriteGuard<'_, '_, T> { - fn deref_mut(&mut self) -> &mut T { - unsafe { &mut *self.lock.data.get() } - } -} - -impl Drop for RwLockReadGuard<'_, '_, T> { - fn drop(&mut self) -> () { - let res = unsafe { libc::pthread_rwlock_unlock( - self.lock.inner as *const _ as *mut _ - ) }; - debug_assert!(res == 0); - } -} - -impl Drop for RwLockWriteGuard<'_, '_, T> { - fn drop(&mut self) -> () { - let res = unsafe { libc::pthread_rwlock_unlock( - self.lock.inner as *const _ as *mut _ - ) }; - debug_assert!(res == 0); - } -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use super::*; -use RwLockError::*; - - #[test] - fn test_single_process() { - let mut lock = MaybeUninit::uninit(); - let wrapper = RwLock::new(&mut lock, 0); - let mut writer = wrapper.write().unwrap(); - assert!(matches!(wrapper.try_write(), Err(Deadlock | WouldBlock))); - assert!(matches!(wrapper.try_read(), Err(Deadlock | WouldBlock))); - *writer = 5; - drop(writer); - let reader = wrapper.read().unwrap(); - assert!(matches!(wrapper.try_write(), Err(Deadlock | WouldBlock))); - assert!(matches!(wrapper.read(), Ok(_))); - assert_eq!(*reader, 5); - drop(reader); - assert!(matches!(wrapper.try_write(), Ok(_))); } - #[test] - fn test_multi_thread() { - let lock = Box::new(MaybeUninit::uninit()); - let wrapper = Arc::new(RwLock::new(Box::leak(lock), 0)); - let mut writer = wrapper.write().unwrap(); - let t1 = { - let wrapper = wrapper.clone(); - std::thread::spawn(move || { - let mut writer = wrapper.write().unwrap(); - *writer = 20; - }) - }; - assert_eq!(*writer, 0); - *writer = 10; - assert_eq!(*writer, 10); - drop(writer); - t1.join().unwrap(); - let mut writer = wrapper.write().unwrap(); - assert_eq!(*writer, 20); - drop(writer); - let mut handles = vec![]; - for _ in 0..5 { - handles.push({ - let wrapper = wrapper.clone(); - std::thread::spawn(move || { - let reader = wrapper.read().unwrap(); - assert_eq!(*reader, 20); - }) - }); + fn lock_exclusive(&self) { + unsafe { + let res = libc::pthread_rwlock_wrlock(self.inner().as_ptr()); + if res != 0 { + panic!("wrlock failed with {res}"); + } } - for h in handles { - h.join().unwrap(); - } - let writer = wrapper.write().unwrap(); - assert_eq!(*writer, 20); } - // // TODO(quantumish): Terrible time-based synchronization, fix me. - // #[test] - // fn test_multi_process() { - // let max_size = 100; - // let init_struct = crate::shmem::ShmemHandle::new("test_multi_process", 0, max_size).unwrap(); - // let ptr = init_struct.data_ptr.as_ptr(); - // let lock: &mut _ = unsafe { ptr.add( - // ptr.align_offset(std::mem::align_of::>()) - // ).cast::>().as_mut().unwrap() } ; - // let wrapper = RwLock::new(lock, 0); + fn try_lock_exclusive(&self) -> bool { + unsafe { + let res = libc::pthread_rwlock_trywrlock(self.inner().as_ptr()); + match res { + 0 => true, + libc::EAGAIN => false, + o => panic!("try_wrlock failed with {o}") + } + } + } - // let fork_result = unsafe { nix::unistd::fork().unwrap() }; - - // if !fork_result.is_parent() { - // let mut writer = wrapper.write().unwrap(); - // std::thread::sleep(std::time::Duration::from_secs(5)); - // *writer = 2; - // } else { - // std::thread::sleep(std::time::Duration::from_secs(1)); - // assert!(matches!(wrapper.try_write(), Err(WouldBlock))); - // std::thread::sleep(std::time::Duration::from_secs(10)); - // let writer = wrapper.try_write().unwrap(); - // assert_eq!(*writer, 2); - // } - // } + unsafe fn unlock_exclusive(&self) { + unsafe { + let res = libc::pthread_rwlock_unlock(self.inner().as_ptr()); + if res != 0 { + panic!("unlock failed with {res}"); + } + } + } + unsafe fn unlock_shared(&self) { + unsafe { + let res = libc::pthread_rwlock_unlock(self.inner().as_ptr()); + if res != 0 { + panic!("unlock failed with {res}"); + } + } + } } From 2fe27f510df465ddd663086d0420f01e121886b4 Mon Sep 17 00:00:00 2001 From: David Freifeld Date: Wed, 2 Jul 2025 11:57:34 -0700 Subject: [PATCH 7/7] Make neon-shmem tests thread-safe and report errno in panics --- libs/neon-shmem/benches/hmap_resize.rs | 16 ++++++---------- libs/neon-shmem/src/hash/tests.rs | 24 +++++++++++++----------- libs/neon-shmem/src/sync.rs | 14 ++++++++------ 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/libs/neon-shmem/benches/hmap_resize.rs b/libs/neon-shmem/benches/hmap_resize.rs index 30a3dca296..6b86e7ed27 100644 --- a/libs/neon-shmem/benches/hmap_resize.rs +++ b/libs/neon-shmem/benches/hmap_resize.rs @@ -1,9 +1,7 @@ -use std::hint::black_box; use criterion::{criterion_group, criterion_main, BatchSize, Criterion, BenchmarkId}; use neon_shmem::hash::HashMapAccess; use neon_shmem::hash::HashMapInit; use neon_shmem::hash::entry::Entry; -use neon_shmem::shmem::ShmemHandle; use rand::prelude::*; use rand::distr::{Distribution, StandardUniform}; use std::hash::BuildHasher; @@ -65,14 +63,13 @@ fn apply_op( op: TestOp, map: &mut HashMapAccess, ) { - let hash = map.get_hash_value(&op.0); - let entry = map.entry_with_hash(op.0, hash); + let entry = map.entry(op.0); match op.1 { Some(new) => { match entry { Entry::Occupied(mut e) => Some(e.insert(new)), - Entry::Vacant(e) => { e.insert(new).unwrap(); None }, + Entry::Vacant(e) => { _ = e.insert(new).unwrap(); None }, } }, None => { @@ -184,15 +181,14 @@ fn real_benchs(c: &mut Criterion) { let mut rng = rand::rng(); b.iter_batched( || HashMapInit::new_resizeable(size, size * 2).attach_writer(), - |mut writer| { - for i in 0..ideal_filled { + |writer| { + for _ in 0..ideal_filled { let key: FileCacheKey = rng.random(); let val = FileCacheEntry::dummy(); - let hash = writer.get_hash_value(&key); - let entry = writer.entry_with_hash(key, hash); + let entry = writer.entry(key); std::hint::black_box(match entry { Entry::Occupied(mut e) => { e.insert(val); }, - Entry::Vacant(e) => { e.insert(val).unwrap(); }, + Entry::Vacant(e) => { _ = e.insert(val).unwrap(); }, }) } }, diff --git a/libs/neon-shmem/src/hash/tests.rs b/libs/neon-shmem/src/hash/tests.rs index 0c760c56b7..d7b8d19050 100644 --- a/libs/neon-shmem/src/hash/tests.rs +++ b/libs/neon-shmem/src/hash/tests.rs @@ -114,7 +114,7 @@ fn apply_op( Some(new) => { match entry { Entry::Occupied(mut e) => Some(e.insert(new)), - Entry::Vacant(e) => { e.insert(new).unwrap(); None }, + Entry::Vacant(e) => { _ = e.insert(new).unwrap(); None }, } }, None => { @@ -277,13 +277,13 @@ fn test_idx_get() { do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng); for _ in 0..100 { let idx = (rng.next_u32() % 1500) as usize; - if let Some(mut e) = writer.entry_at_bucket(idx) { + if let Some(pair) = writer.get_at_bucket(idx) { { - let v: *const usize = e.get(); + let v: *const usize = &pair.1; assert_eq!(writer.get_bucket_for_value(v), idx); } { - let v: *const usize = e.get_mut(); + let v: *const usize = &pair.1; assert_eq!(writer.get_bucket_for_value(v), idx); } } @@ -339,7 +339,7 @@ fn test_bucket_ops() { ).attach_writer(); match writer.entry(1.into()) { Entry::Occupied(mut e) => { e.insert(2); }, - Entry::Vacant(e) => { e.insert(2).unwrap(); }, + Entry::Vacant(e) => { _ = e.insert(2).unwrap(); }, } assert_eq!(writer.get_num_buckets_in_use(), 1); assert_eq!(writer.get_num_buckets(), 1000); @@ -348,14 +348,16 @@ fn test_bucket_ops() { Entry::Occupied(e) => { assert_eq!(e._key, 1.into()); let pos = e.bucket_pos as usize; - assert_eq!(writer.entry_at_bucket(pos).unwrap()._key, 1.into()); - assert_eq!(*writer.get_at_bucket(pos).unwrap(), (1.into(), 2)); pos }, Entry::Vacant(_) => { panic!("Insert didn't affect entry"); }, }; - let ptr: *const usize = &*writer.get(&1.into()).unwrap(); - assert_eq!(writer.get_bucket_for_value(ptr), pos); + assert_eq!(writer.entry_at_bucket(pos).unwrap()._key, 1.into()); + assert_eq!(*writer.get_at_bucket(pos).unwrap(), (1.into(), 2)); + { + let ptr: *const usize = &*writer.get(&1.into()).unwrap(); + assert_eq!(writer.get_bucket_for_value(ptr), pos); + } writer.remove(&1.into()); assert!(writer.get(&1.into()).is_none()); } @@ -390,7 +392,7 @@ fn test_shrink_zero() { #[test] #[should_panic] fn test_grow_oom() { - let mut writer = HashMapInit::::new_resizeable_named( + let writer = HashMapInit::::new_resizeable_named( 1500, 2000, "test_grow_oom" ).attach_writer(); writer.grow(20000).unwrap(); @@ -408,7 +410,7 @@ fn test_shrink_bigger() { #[test] #[should_panic] fn test_shrink_early_finish() { - let mut writer = HashMapInit::::new_resizeable_named( + let writer = HashMapInit::::new_resizeable_named( 1500, 2500, "test_shrink_early_finish" ).attach_writer(); writer.finish_shrink().unwrap(); diff --git a/libs/neon-shmem/src/sync.rs b/libs/neon-shmem/src/sync.rs index 8887299a92..fc39df9100 100644 --- a/libs/neon-shmem/src/sync.rs +++ b/libs/neon-shmem/src/sync.rs @@ -3,6 +3,8 @@ use std::mem::MaybeUninit; use std::ptr::NonNull; +use nix::errno::Errno; + pub type RwLock = lock_api::RwLock; pub(crate) type RwLockReadGuard<'a, T> = lock_api::RwLockReadGuard<'a, PthreadRwLock, T>; pub type RwLockWriteGuard<'a, T> = lock_api::RwLockWriteGuard<'a, PthreadRwLock, T>; @@ -48,7 +50,7 @@ unsafe impl lock_api::RawRwLock for PthreadRwLock { unsafe { let res = libc::pthread_rwlock_rdlock(self.inner().as_ptr()); if res != 0 { - panic!("rdlock failed with {res}"); + panic!("rdlock failed with {}", Errno::from_raw(res)); } } } @@ -59,7 +61,7 @@ unsafe impl lock_api::RawRwLock for PthreadRwLock { match res { 0 => true, libc::EAGAIN => false, - o => panic!("try_rdlock failed with {o}") + o => panic!("try_rdlock failed with {}", Errno::from_raw(res)), } } } @@ -68,7 +70,7 @@ unsafe impl lock_api::RawRwLock for PthreadRwLock { unsafe { let res = libc::pthread_rwlock_wrlock(self.inner().as_ptr()); if res != 0 { - panic!("wrlock failed with {res}"); + panic!("wrlock failed with {}", Errno::from_raw(res)); } } } @@ -79,7 +81,7 @@ unsafe impl lock_api::RawRwLock for PthreadRwLock { match res { 0 => true, libc::EAGAIN => false, - o => panic!("try_wrlock failed with {o}") + o => panic!("try_wrlock failed with {}", Errno::from_raw(res)), } } } @@ -88,7 +90,7 @@ unsafe impl lock_api::RawRwLock for PthreadRwLock { unsafe { let res = libc::pthread_rwlock_unlock(self.inner().as_ptr()); if res != 0 { - panic!("unlock failed with {res}"); + panic!("unlock failed with {}", Errno::from_raw(res)); } } } @@ -96,7 +98,7 @@ unsafe impl lock_api::RawRwLock for PthreadRwLock { unsafe { let res = libc::pthread_rwlock_unlock(self.inner().as_ptr()); if res != 0 { - panic!("unlock failed with {res}"); + panic!("unlock failed with {}", Errno::from_raw(res)); } } }