Heavily WIP rewrite of hashmap for concurrency + perf

This commit is contained in:
David Freifeld
2025-07-10 17:34:30 -07:00
parent da6f4dbafe
commit 7f655ddb9a
8 changed files with 1026 additions and 790 deletions

29
Cargo.lock generated
View File

@@ -247,6 +247,15 @@ dependencies = [
"syn 2.0.100",
]
[[package]]
name = "atomic"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340"
dependencies = [
"bytemuck",
]
[[package]]
name = "atomic-take"
version = "1.1.0"
@@ -1087,9 +1096,23 @@ checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
[[package]]
name = "bytemuck"
version = "1.16.3"
version = "1.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83"
checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422"
dependencies = [
"bytemuck_derive",
]
[[package]]
name = "bytemuck_derive"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ecc273b49b3205b83d648f0690daa588925572cc5063745bfe547fe7ec8e1a1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
]
[[package]]
name = "byteorder"
@@ -3951,6 +3974,8 @@ name = "neon-shmem"
version = "0.1.0"
dependencies = [
"ahash",
"atomic",
"bytemuck",
"criterion",
"foldhash",
"hashbrown 0.15.4",

View File

@@ -12,6 +12,8 @@ rustc-hash = { version = "2.1.1" }
rand = "0.9.1"
libc.workspace = true
lock_api = "0.4.13"
atomic = "0.6.1"
bytemuck = { version = "1.23.1", features = ["derive"] }
[dev-dependencies]
criterion = { workspace = true, features = ["html_reports"] }

View File

@@ -1,51 +1,43 @@
//! Resizable hash table implementation on top of byte-level storage (either a [`ShmemHandle`] or a
//! fixed byte array).
//!
//! This hash table has two major components: the bucket array and the dictionary. Each bucket
//! within the bucket array contains a `Option<(K, V)>` and an index of another bucket. In this way
//! there is both an implicit freelist within the bucket array (`None` buckets point to other `None`
//! entries) and various hash chains within the bucket array (a Some bucket will point to other Some
//! buckets that had the same hash).
//!
//! Buckets are never moved unless they are within a region that is being shrunk, and so the actual
//! hash- dependent component is done with the dictionary. When a new key is inserted into the map,
//! a position within the dictionary is decided based on its hash, the data is inserted into an
//! empty bucket based off of the freelist, and then the index of said bucket is placed in the
//! dictionary.
//!
//! This map is resizable (if initialized on top of a [`ShmemHandle`]). Both growing and shrinking
//! happen in-place and are at a high level achieved by expanding/reducing the bucket array and
//! rebuilding the dictionary by rehashing all keys.
use std::hash::{BuildHasher, Hash};
use std::mem::MaybeUninit;
use std::ptr::NonNull;
use std::sync::atomic::Ordering;
use crate::{shmem, sync::{
PthreadRwLock, RwLock, RwLockReadGuard, RwLockWriteGuard, ValueReadGuard
}};
use crate::shmem::ShmemHandle;
use crate::{shmem, sync::*};
mod core;
mod bucket;
pub mod entry;
#[cfg(test)]
mod tests;
use core::{Bucket, CoreHashMap, INVALID_POS};
use entry::{Entry, OccupiedEntry, PrevPos, VacantEntry};
use core::{
CoreHashMap, DictShard, EntryKey, EntryType,
FullError, MaybeUninitDictShard
};
use bucket::{Bucket, BucketIdx};
use entry::Entry;
/// Wrapper struct around multiple [`ShmemHandle`]s.
struct HashMapHandles {
keys_shmem: ShmemHandle,
idxs_shmem: ShmemHandle,
vals_shmem: ShmemHandle,
}
/// This represents a hash table that (possibly) lives in shared memory.
/// If a new process is launched with fork(), the child process inherits
/// this struct.
#[must_use]
pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> {
shmem_handle: Option<ShmemHandle>,
shmem_handles: Option<HashMapHandles>,
shared_ptr: *mut HashMapShared<'a, K, V>,
shared_size: usize,
hasher: S,
num_buckets: u32,
num_buckets: usize,
num_shards: usize,
resize_lock: Mutex<()>,
}
/// This is a per-process handle to a hash table that (possibly) lives in shared memory.
@@ -55,9 +47,10 @@ pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> {
/// XXX: We're not making use of it at the moment, but this struct could
/// hold process-local information in the future.
pub struct HashMapAccess<'a, K, V, S = rustc_hash::FxBuildHasher> {
shmem_handle: Option<ShmemHandle>,
shmem_handles: Option<HashMapHandles>,
shared_ptr: *mut HashMapShared<'a, K, V>,
hasher: S,
resize_lock: Mutex<()>,
}
unsafe impl<K: Sync, V: Sync, S> Sync for HashMapAccess<'_, K, V, S> {}
@@ -70,79 +63,104 @@ impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {
/// before inserting any entries and before calling attach_writer/reader.
/// Otherwise different accessors could be using different hash function,
/// with confusing results.
///
/// TODO(quantumish): consider splitting out into a separate builder type?
pub fn with_hasher<T: BuildHasher>(self, hasher: T) -> HashMapInit<'a, K, V, T> {
HashMapInit {
hasher,
shmem_handle: self.shmem_handle,
shmem_handles: self.shmem_handles,
shared_ptr: self.shared_ptr,
shared_size: self.shared_size,
num_buckets: self.num_buckets,
num_shards: self.num_shards,
resize_lock: self.resize_lock,
}
}
/// Loosely (over)estimate the size needed to store a hash table with `num_buckets` buckets.
pub fn estimate_size(num_buckets: u32) -> usize {
// add some margin to cover alignment etc.
CoreHashMap::<K, V>::estimate_size(num_buckets) + size_of::<HashMapShared<K, V>>() + 1000
}
pub fn estimate_sizes(num_buckets: usize, num_shards: usize) -> (usize, usize, usize) {
(
(size_of::<EntryKey<K>>() * num_buckets)
+ (size_of::<libc::pthread_rwlock_t>() * num_shards)
+ (size_of::<RwLock<DictShard<'_, K>>>() * num_shards)
+ size_of::<HashMapShared<K, V>>()
+ 1000,
(size_of::<BucketIdx>() * num_buckets)+ 1000,
(size_of::<Bucket<V>>() * num_buckets) + 1000
)
}
fn carve_space<T>(ptr: &mut *mut u8, amount: usize) -> *mut T {
*ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<T>())) };
let out = ptr.cast();
*ptr = unsafe { ptr.add(size_of::<T>() * amount) };
out
}
fn new(
num_buckets: u32,
shmem_handle: Option<ShmemHandle>,
area_ptr: *mut u8,
area_size: usize,
num_buckets: usize,
num_shards: usize,
mut keys_ptr: *mut u8,
mut idxs_ptr: *mut u8,
mut vals_ptr: *mut u8,
shmem_handles: Option<HashMapHandles>,
hasher: S,
) -> Self {
let mut ptr: *mut u8 = area_ptr;
let end_ptr: *mut u8 = unsafe { ptr.add(area_size) };
// carve out area for the One Big Lock (TM) and the HashMapShared.
ptr = unsafe { ptr.add(ptr.align_offset(align_of::<libc::pthread_rwlock_t>())) };
let raw_lock_ptr = ptr;
ptr = unsafe { ptr.add(size_of::<libc::pthread_rwlock_t>()) };
ptr = unsafe { ptr.add(ptr.align_offset(align_of::<HashMapShared<K, V>>())) };
let shared_ptr: *mut HashMapShared<K, V> = ptr.cast();
ptr = unsafe { ptr.add(size_of::<HashMapShared<K, V>>()) };
// carve out the buckets
ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<core::Bucket<K, V>>())) };
let buckets_ptr = ptr;
ptr = unsafe { ptr.add(size_of::<core::Bucket<K, V>>() * num_buckets as usize) };
// use remaining space for the dictionary
ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<u32>())) };
assert!(ptr.addr() < end_ptr.addr());
let dictionary_ptr = ptr;
let dictionary_size = unsafe { end_ptr.byte_offset_from(ptr) / size_of::<u32>() as isize };
assert!(dictionary_size > 0);
// Set up the main area: hashmap info at front, keys at back
let mutex_ptr = Self::carve_space::<libc::pthread_mutex_t>(&mut keys_ptr, 1);
let shared_ptr = Self::carve_space::<HashMapShared<K, V>>(&mut keys_ptr, 1);
let shards_ptr = Self::carve_space::<RwLock<DictShard<'_, K>>>(&mut keys_ptr, num_shards);
let locks_ptr = Self::carve_space::<libc::pthread_rwlock_t>(&mut keys_ptr, num_shards);
let keys_ptr = Self::carve_space::<EntryKey<K>>(&mut keys_ptr, num_buckets);
// Set up the area of bucket idxs and the area of buckets. Not much to do!
let idxs_ptr = Self::carve_space::<BucketIdx>(&mut idxs_ptr, num_buckets);
let vals_ptr = Self::carve_space::<Bucket<V>>(&mut vals_ptr, num_buckets);
// Initialize the shards.
let shards_uninit: &mut [MaybeUninit<RwLock<MaybeUninitDictShard<'_, K>>>] =
unsafe { std::slice::from_raw_parts_mut(shards_ptr.cast(), num_shards) };
let shard_size = num_buckets / num_shards;
for i in 0..num_shards {
let size = ((i + 1) * shard_size).min(num_buckets) - (i * shard_size);
unsafe {
shards_uninit[i].write(RwLock::from_raw(
PthreadRwLock::new(NonNull::new_unchecked(locks_ptr.add(i))),
MaybeUninitDictShard {
keys: std::slice::from_raw_parts_mut(keys_ptr.add(i * shard_size).cast(), size),
idxs: std::slice::from_raw_parts_mut(idxs_ptr.add(i * shard_size).cast(), size)
}
));
};
}
let shards: &mut [RwLock<MaybeUninitDictShard<'_, K>>] =
unsafe { std::slice::from_raw_parts_mut(shards_ptr.cast(), num_shards) };
let buckets =
unsafe { std::slice::from_raw_parts_mut(buckets_ptr.cast(), num_buckets as usize) };
let dictionary = unsafe {
std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize)
};
unsafe { std::slice::from_raw_parts_mut(vals_ptr.cast(), num_buckets) };
let hashmap = CoreHashMap::new(buckets, dictionary);
let lock = RwLock::from_raw(PthreadRwLock::new(raw_lock_ptr.cast()), hashmap);
unsafe {
std::ptr::write(shared_ptr, lock);
}
let hashmap = CoreHashMap::new(buckets, shards);
unsafe { std::ptr::write(shared_ptr, hashmap); }
let resize_lock = Mutex::from_raw(
unsafe { PthreadMutex::new(NonNull::new_unchecked(mutex_ptr)) }, ()
);
Self {
num_shards,
num_buckets,
shmem_handle,
shmem_handles,
shared_ptr,
shared_size: area_size,
hasher,
resize_lock,
}
}
/// Attach to a hash table for writing.
pub fn attach_writer(self) -> HashMapAccess<'a, K, V, S> {
HashMapAccess {
shmem_handle: self.shmem_handle,
shmem_handles: self.shmem_handles,
shared_ptr: self.shared_ptr,
hasher: self.hasher,
resize_lock: self.resize_lock,
}
}
@@ -152,31 +170,27 @@ impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {
}
}
/// Hash table data that is actually stored in the shared memory area.
///
/// NOTE: We carve out the parts from a contiguous chunk. Growing and shrinking the hash table
/// relies on the memory layout! The data structures are laid out in the contiguous shared memory
/// area as follows:
///
/// [`libc::pthread_rwlock_t`]
/// [`HashMapShared`]
/// [buckets]
/// [dictionary]
///
/// In between the above parts, there can be padding bytes to align the parts correctly.
type HashMapShared<'a, K, V> = RwLock<CoreHashMap<'a, K, V>>;
type HashMapShared<'a, K, V> = CoreHashMap<'a, K, V>;
impl<'a, K, V> HashMapInit<'a, K, V, rustc_hash::FxBuildHasher>
where
K: Clone + Hash + Eq,
{
/// Place the hash table within a user-supplied fixed memory area.
pub fn with_fixed(num_buckets: u32, area: &'a mut [MaybeUninit<u8>]) -> Self {
pub fn with_fixed(
num_buckets: usize,
num_shards: usize,
area: &'a mut [MaybeUninit<u8>]
) -> Self {
let (keys_size, idxs_size, _) = Self::estimate_sizes(num_buckets, num_shards);
let ptr = area.as_mut_ptr().cast();
Self::new(
num_buckets,
num_shards,
ptr,
unsafe { ptr.add(keys_size) },
unsafe { ptr.add(keys_size).add(idxs_size) },
None,
area.as_mut_ptr().cast(),
area.len(),
rustc_hash::FxBuildHasher,
)
}
@@ -185,45 +199,65 @@ where
///
/// # Panics
/// Will panic on failure to resize area to expected map size.
pub fn with_shmem(num_buckets: u32, shmem: ShmemHandle) -> Self {
let size = Self::estimate_size(num_buckets);
shmem
.set_size(size)
.expect("could not resize shared memory area");
let ptr = shmem.data_ptr.as_ptr().cast();
pub fn with_shmems(
num_buckets: usize,
num_shards: usize,
keys_shmem: ShmemHandle,
idxs_shmem: ShmemHandle,
vals_shmem: ShmemHandle,
) -> Self {
let (keys_size, idxs_size, vals_size) = Self::estimate_sizes(num_buckets, num_shards);
keys_shmem.set_size(keys_size).expect("could not resize shared memory area");
idxs_shmem.set_size(idxs_size).expect("could not resize shared memory area");
vals_shmem.set_size(vals_size).expect("could not resize shared memory area");
Self::new(
num_buckets,
Some(shmem),
ptr,
size,
num_shards,
keys_shmem.data_ptr.as_ptr().cast(),
idxs_shmem.data_ptr.as_ptr().cast(),
vals_shmem.data_ptr.as_ptr().cast(),
Some(HashMapHandles { keys_shmem, idxs_shmem, vals_shmem }),
rustc_hash::FxBuildHasher,
)
}
/// Make a resizable hash map within a new shared memory area with the given name.
pub fn new_resizeable_named(num_buckets: u32, max_buckets: u32, name: &str) -> Self {
let size = Self::estimate_size(num_buckets);
let max_size = Self::estimate_size(max_buckets);
let shmem =
ShmemHandle::new(name, size, max_size).expect("failed to make shared memory area");
let ptr = shmem.data_ptr.as_ptr().cast();
pub fn new_resizeable_named(
num_buckets: usize,
max_buckets: usize,
num_shards: usize,
name: &str
) -> Self {
let (keys_size, idxs_size, vals_size) = Self::estimate_sizes(num_buckets, num_shards);
let (keys_max, idxs_max, vals_max) = Self::estimate_sizes(max_buckets, num_shards);
let keys_shmem = ShmemHandle::new(&format!("{name}_keys"), keys_size, keys_max)
.expect("failed to make shared memory area");
let idxs_shmem = ShmemHandle::new(&format!("{name}_idxs"), idxs_size, idxs_max)
.expect("failed to make shared memory area");
let vals_shmem = ShmemHandle::new(&format!("{name}_vals"), vals_size, vals_max)
.expect("failed to make shared memory area");
Self::new(
num_buckets,
Some(shmem),
ptr,
size,
num_shards,
keys_shmem.data_ptr.as_ptr().cast(),
idxs_shmem.data_ptr.as_ptr().cast(),
vals_shmem.data_ptr.as_ptr().cast(),
Some(HashMapHandles { keys_shmem, idxs_shmem, vals_shmem }),
rustc_hash::FxBuildHasher,
)
}
/// Make a resizable hash map within a new anonymous shared memory area.
pub fn new_resizeable(num_buckets: u32, max_buckets: u32) -> Self {
pub fn new_resizeable(
num_buckets: usize,
max_buckets: usize,
num_shards: usize,
) -> Self {
use std::sync::atomic::{AtomicUsize, Ordering};
static COUNTER: AtomicUsize = AtomicUsize::new(0);
let val = COUNTER.fetch_add(1, Ordering::Relaxed);
let name = format!("neon_shmem_hmap{val}");
Self::new_resizeable_named(num_buckets, max_buckets, &name)
Self::new_resizeable_named(num_buckets, max_buckets, num_shards, &name)
}
}
@@ -237,66 +271,27 @@ where
self.hasher.hash_one(key)
}
fn entry_with_hash(&self, key: K, hash: u64) -> Entry<'a, '_, K, V> {
let mut map = unsafe { self.shared_ptr.as_ref() }.unwrap().write();
let dict_pos = hash as usize % map.dictionary.len();
let first = map.dictionary[dict_pos];
if first == INVALID_POS {
// no existing entry
return Entry::Vacant(VacantEntry {
map,
key,
dict_pos: dict_pos as u32,
});
}
let mut prev_pos = PrevPos::First(dict_pos as u32);
let mut next = first;
loop {
let bucket = &mut map.buckets[next as usize];
let (bucket_key, _bucket_value) = bucket.inner.as_mut().expect("entry is in use");
if *bucket_key == key {
// found existing entry
return Entry::Occupied(OccupiedEntry {
map,
_key: key,
prev_pos,
bucket_pos: next,
});
}
if bucket.next == INVALID_POS {
// No existing entry
return Entry::Vacant(VacantEntry {
map,
key,
dict_pos: dict_pos as u32,
});
}
prev_pos = PrevPos::Chained(next);
next = bucket.next;
}
}
/// Get a reference to the corresponding value for a key.
pub fn get<'e>(&'e self, key: &K) -> Option<ValueReadGuard<'e, V>> {
let hash = self.get_hash_value(key);
let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
RwLockReadGuard::try_map(map, |m| m.get_with_hash(key, hash)).ok()
let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
map.get_with_hash(key, hash)
}
/// Get a reference to the entry containing a key.
pub fn entry(&self, key: K) -> Entry<'a, '_, K, V> {
pub fn entry(&self, key: K) -> Result<Entry<'a, K, V>, FullError> {
let hash = self.get_hash_value(&key);
self.entry_with_hash(key, hash)
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
map.entry_with_hash(key, hash)
}
/// Remove a key given its hash. Returns the associated value if it existed.
pub fn remove(&self, key: &K) -> Option<V> {
let hash = self.get_hash_value(key);
match self.entry_with_hash(key.clone(), hash) {
Entry::Occupied(e) => Some(e.remove()),
Entry::Vacant(_) => None,
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
match map.entry_with_hash(key.clone(), hash) {
Ok(Entry::Occupied(mut e)) => Some(e.remove()),
_ => None,
}
}
@@ -306,154 +301,110 @@ where
/// Will return [`core::FullError`] if there is no more space left in the map.
pub fn insert(&self, key: K, value: V) -> Result<Option<V>, core::FullError> {
let hash = self.get_hash_value(&key);
match self.entry_with_hash(key.clone(), hash) {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
match map.entry_with_hash(key.clone(), hash)? {
Entry::Occupied(mut e) => Ok(Some(e.insert(value))),
Entry::Vacant(e) => {
_ = e.insert(value)?;
_ = e.insert(value);
Ok(None)
}
}
}
/// Optionally return the entry for a bucket at a given index if it exists.
///
/// Has more overhead than one would intuitively expect: performs both a clone of the key
/// due to the [`OccupiedEntry`] type owning the key and also a hash of the key in order
/// to enable repairing the hash chain if the entry is removed.
pub fn entry_at_bucket(&self, pos: usize) -> Option<OccupiedEntry<'a, '_, K, V>> {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
if pos >= map.buckets.len() {
/// Optionally return reference to a bucket at a given index if it exists.
pub fn get_at_bucket(&self, pos: usize) -> Option<&V> {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
if pos >= map.bucket_arr.buckets.len() {
return None;
}
let entry = map.buckets[pos].inner.as_ref();
match entry {
Some((key, _)) => Some(OccupiedEntry {
_key: key.clone(),
bucket_pos: pos as u32,
prev_pos: entry::PrevPos::Unknown(
self.get_hash_value(key)
),
map,
}),
_ => None,
}
todo!("safely check if a given bucket is empty? always mark?");
}
/// Returns the number of buckets in the table.
pub fn get_num_buckets(&self) -> usize {
let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
map.get_num_buckets()
}
/// Return the key and value stored in bucket with given index. This can be used to
/// iterate through the hash map.
// TODO: An Iterator might be nicer. The communicator's clock algorithm needs to
// _slowly_ iterate through all buckets with its clock hand, without holding a lock.
// If we switch to an Iterator, it must not hold the lock.
pub fn get_at_bucket(&self, pos: usize) -> Option<ValueReadGuard<(K, V)>> {
let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
if pos >= map.buckets.len() {
return None;
}
RwLockReadGuard::try_map(map, |m| m.buckets[pos].inner.as_ref()).ok()
}
/// Returns the index of the bucket a given value corresponds to.
pub fn get_bucket_for_value(&self, val_ptr: *const V) -> usize {
let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
let origin = map.buckets.as_ptr();
let idx = (val_ptr as usize - origin as usize) / size_of::<Bucket<K, V>>();
assert!(idx < map.buckets.len());
let origin = map.bucket_arr.buckets.as_ptr();
let idx = (val_ptr as usize - origin as usize) / size_of::<Bucket<V>>();
assert!(idx < map.bucket_arr.buckets.len());
idx
}
/// Returns the number of occupied buckets in the table.
pub fn get_num_buckets_in_use(&self) -> usize {
let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
map.buckets_in_use as usize
let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
map.bucket_arr.buckets_in_use.load(Ordering::Relaxed)
}
/// Clears all entries in a table. Does not reset any shrinking operations.
pub fn clear(&self) {
let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
map.clear();
}
/// Perform an in-place rehash of some region (0..`rehash_buckets`) of the table and reset
/// the `buckets` and `dictionary` slices to be as long as `num_buckets`. Resets the freelist
/// in the process.
fn rehash_dict(
pub fn rehash(
&self,
inner: &mut RwLockWriteGuard<'_, CoreHashMap<'a, K, V>>,
buckets_ptr: *mut core::Bucket<K, V>,
end_ptr: *mut u8,
num_buckets: u32,
rehash_buckets: u32,
shards: &mut Vec<RwLockWriteGuard<'_, DictShard<'_, K>>>,
rehash_buckets: usize
) {
inner.free_head = INVALID_POS;
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
assert!(rehash_buckets <= map.get_num_buckets(), "rehashing subset of buckets");
shards.iter_mut().for_each(|x| x.keys.iter_mut().for_each(|key| {
if let EntryType::Occupied = key.tag {
key.tag = EntryType::Rehash;
}
}));
let buckets;
let dictionary;
unsafe {
let buckets_end_ptr = buckets_ptr.add(num_buckets as usize);
let dictionary_ptr: *mut u32 = buckets_end_ptr
.byte_add(buckets_end_ptr.align_offset(align_of::<u32>()))
.cast();
let dictionary_size: usize =
end_ptr.byte_offset_from(buckets_end_ptr) as usize / size_of::<u32>();
buckets = std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize);
dictionary = std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size);
}
for e in dictionary.iter_mut() {
*e = INVALID_POS;
}
for (i, bucket) in buckets.iter_mut().enumerate().take(rehash_buckets as usize) {
if bucket.inner.is_none() {
bucket.next = inner.free_head;
inner.free_head = i as u32;
continue;
}
let hash = self.hasher.hash_one(&bucket.inner.as_ref().unwrap().0);
let pos: usize = (hash % dictionary.len() as u64) as usize;
bucket.next = dictionary[pos];
dictionary[pos] = i as u32;
}
inner.dictionary = dictionary;
inner.buckets = buckets;
todo!("solution with no memory allocation: split out metadata?")
}
/// Rehash the map without growing or shrinking.
pub fn shuffle(&self) {
let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
let num_buckets = map.get_num_buckets() as u32;
let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
let end_ptr: *mut u8 = unsafe { self.shared_ptr.byte_add(size_bytes).cast() };
let buckets_ptr = map.buckets.as_mut_ptr();
self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets);
pub fn shuffle(&self) {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
let mut shards: Vec<_> = map.dict_shards.iter().map(|x| x.write()).collect();
self.rehash(&mut shards, map.get_num_buckets());
}
fn reshard(&self, shards: &mut Vec<RwLockWriteGuard<'_, DictShard<'_, K>>>, num_buckets: usize) {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
let shard_size = num_buckets / map.dict_shards.len();
for i in 0..map.dict_shards.len() {
let size = ((i + 1) * shard_size).min(num_buckets) - (i * shard_size);
unsafe {
shards[i].keys = std::slice::from_raw_parts_mut(shards[i].keys.as_mut_ptr(), size);
shards[i].idxs = std::slice::from_raw_parts_mut(shards[i].idxs.as_mut_ptr(), size);
}
}
}
/// Grow the number of buckets within the table.
///
/// 1. Grows the underlying shared memory area
/// 2. Initializes new buckets and overwrites the current dictionary
/// 3. Rehashes the dictionary
///
/// # Panics
/// Panics if called on a map initialized with [`HashMapInit::with_fixed`].
///
/// # Errors
/// Returns an [`shmem::Error`] if any errors occur resizing the memory region.
pub fn grow(&self, num_buckets: u32) -> Result<(), shmem::Error> {
let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
let old_num_buckets = map.buckets.len() as u32;
fn resize_shmem(&self, num_buckets: usize) -> Result<(), shmem::Error> {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
let shmem_handles = self
.shmem_handles
.as_ref()
.expect("grow called on a fixed-size hash table");
let (keys_size, idxs_size, vals_size) =
HashMapInit::<K, V, S>::estimate_sizes(num_buckets, map.dict_shards.len());
shmem_handles.keys_shmem.set_size(keys_size)?;
shmem_handles.idxs_shmem.set_size(idxs_size)?;
shmem_handles.vals_shmem.set_size(vals_size)?;
Ok(())
}
pub fn grow(&self, num_buckets: usize) -> Result<(), shmem::Error> {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
let _resize_guard = self.resize_lock.lock();
let mut shards: Vec<_> = map.dict_shards.iter().map(|x| x.write()).collect();
let old_num_buckets = map.bucket_arr.buckets.len();
assert!(
num_buckets >= old_num_buckets,
"grow called with a smaller number of buckets"
@@ -461,128 +412,114 @@ where
if num_buckets == old_num_buckets {
return Ok(());
}
let shmem_handle = self
.shmem_handle
.as_ref()
.expect("grow called on a fixed-size hash table");
let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
shmem_handle.set_size(size_bytes)?;
let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
// Initialize new buckets. The new buckets are linked to the free list.
// NB: This overwrites the dictionary!
let buckets_ptr = map.buckets.as_mut_ptr();
// Grow memory areas and initialize each of them.
self.resize_shmem(num_buckets)?;
unsafe {
let buckets_ptr = map.bucket_arr.buckets.as_mut_ptr();
for i in old_num_buckets..num_buckets {
let bucket = buckets_ptr.add(i as usize);
bucket.write(core::Bucket {
next: if i < num_buckets - 1 {
i + 1
let bucket = buckets_ptr.add(i);
bucket.write(Bucket::empty(
if i < num_buckets - 1 {
BucketIdx::new(i + 1)
} else {
map.free_head
},
inner: None,
});
map.bucket_arr.free_head.load(Ordering::Relaxed)
}
));
}
// TODO(quantumish) a bit questionable to use pointers here
let first_shard = &mut shards[0];
let keys_ptr = first_shard.keys.as_mut_ptr();
for i in old_num_buckets..num_buckets {
let key = keys_ptr.add(i);
key.write(EntryKey {
tag: EntryType::Empty,
val: MaybeUninit::uninit(),
});
}
let idxs_ptr = first_shard.idxs.as_mut_ptr();
for i in old_num_buckets..num_buckets {
let idx = idxs_ptr.add(i);
idx.write(BucketIdx::invalid());
}
}
self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, old_num_buckets);
map.free_head = old_num_buckets;
self.reshard(&mut shards, num_buckets);
self.rehash(&mut shards, old_num_buckets);
map.bucket_arr.free_head.store(
BucketIdx::new(old_num_buckets), Ordering::Relaxed
);
Ok(())
}
/// Begin a shrink, limiting all new allocations to be in buckets with index below `num_buckets`.
///
/// # Panics
/// Panics if called on a map initialized with [`HashMapInit::with_fixed`] or if `num_buckets` is
/// greater than the number of buckets in the map.
pub fn begin_shrink(&mut self, num_buckets: u32) {
let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
pub fn begin_shrink(&mut self, num_buckets: usize) {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
let _resize_guard = self.resize_lock.lock();
assert!(
num_buckets <= map.get_num_buckets() as u32,
num_buckets <= map.get_num_buckets(),
"shrink called with a larger number of buckets"
);
_ = self
.shmem_handle
.shmem_handles
.as_ref()
.expect("shrink called on a fixed-size hash table");
map.alloc_limit = num_buckets;
map.bucket_arr.alloc_limit.store(
BucketIdx::new(num_buckets), Ordering::SeqCst
);
}
/// If a shrink operation is underway, returns the target size of the map. Otherwise, returns None.
// TODO(quantumish): Safety? Maybe replace this with expanded version of finish_shrink?
pub fn shrink_goal(&self) -> Option<usize> {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap().read();
let goal = map.alloc_limit;
if goal == INVALID_POS { None } else { Some(goal as usize) }
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
let goal = map.bucket_arr.alloc_limit.load(Ordering::Relaxed);
goal.pos_checked()
}
/// Complete a shrink after caller has evicted entries, removing the unused buckets and rehashing.
///
/// # Panics
/// The following two cases result in a panic:
/// - Calling this function on a map initialized with [`HashMapInit::with_fixed`].
/// - Calling this function on a map when no shrink operation is in progress.
/// there are more buckets in use than the value returned by [`HashMapAccess::shrink_goal`].
///
/// # Errors
/// Returns an [`shmem::Error`] if any errors occur resizing the memory region.
pub fn finish_shrink(&self) -> Result<(), shmem::Error> {
let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
assert!(
map.alloc_limit != INVALID_POS,
"called finish_shrink when no shrink is in progress"
);
/// Complete a shrink after caller has evicted entries, removing the unused buckets and rehashing.
///
/// # Panics
/// The following cases result in a panic:
/// - Calling this function on a map initialized with [`HashMapInit::with_fixed`].
/// - Calling this function on a map when no shrink operation is in progress.
/// - Calling this function on a map with `shrink_mode` set to [`HashMapShrinkMode::Remap`] and
/// there are more buckets in use than the value returned by [`HashMapAccess::shrink_goal`].
///
/// # Errors
/// Returns an [`shmem::Error`] if any errors occur resizing the memory region.
pub fn finish_shrink(&self) -> Result<(), shmem::Error> {
let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
assert!(
map.alloc_limit != INVALID_POS,
"called finish_shrink when no shrink is in progress"
);
let num_buckets = map.alloc_limit;
if map.get_num_buckets() == num_buckets as usize {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
let _resize_guard = self.resize_lock.lock();
let mut shards: Vec<_> = map.dict_shards.iter().map(|x| x.write()).collect();
let num_buckets = map.bucket_arr.alloc_limit
.load(Ordering::Relaxed)
.pos_checked()
.expect("called finish_shrink when no shrink is in progress");
if map.get_num_buckets() == num_buckets {
return Ok(());
}
assert!(
map.buckets_in_use <= num_buckets,
map.bucket_arr.buckets_in_use.load(Ordering::Relaxed) <= num_buckets,
"called finish_shrink before enough entries were removed"
);
for i in (num_buckets as usize)..map.buckets.len() {
if let Some((k, v)) = map.buckets[i].inner.take() {
// alloc_bucket increases count, so need to decrease since we're just moving
map.buckets_in_use -= 1;
map.alloc_bucket(k, v).unwrap();
}
}
// let shard_size = shards[0].len();
// for i in (num_buckets as usize)..map.buckets.len() {
// let shard_start = num_buckets / shard_size;
// let shard = shards[shard_start];
// let entry_start = num_buckets % shard_size;
// for entry_idx in entry_start..shard.len() {
// }
// if let EntryKey::Occupied(v) = map.[i].inner.take() {
// // alloc_bucket increases count, so need to decrease since we're just moving
// map.buckets_in_use.fetch_sub(1, Ordering::Relaxed);
// map.alloc_bucket(k, v).unwrap();
// }
// }
let shmem_handle = self
.shmem_handle
.as_ref()
.expect("shrink called on a fixed-size hash table");
todo!("dry way to handle reinsertion");
let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
shmem_handle.set_size(size_bytes)?;
let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
let buckets_ptr = map.buckets.as_mut_ptr();
self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets);
map.alloc_limit = INVALID_POS;
self.resize_shmem(num_buckets)?;
self.reshard(&mut shards, num_buckets);
self.rehash(&mut shards, num_buckets);
map.bucket_arr.alloc_limit.store(BucketIdx::invalid(), Ordering::Relaxed);
Ok(())
}

View File

@@ -0,0 +1,232 @@
use std::{mem::MaybeUninit, sync::atomic::{AtomicUsize, Ordering}};
use atomic::Atomic;
#[derive(bytemuck::NoUninit, Clone, Copy, PartialEq, Eq)]
#[repr(transparent)]
pub(crate) struct BucketIdx(pub(super) u32);
impl BucketIdx {
pub const INVALID: u32 = 0x7FFFFFFF;
pub const MARK_TAG: u32 = 0x80000000;
pub(super) fn is_marked(&self) -> bool {
self.0 & Self::MARK_TAG != 0
}
pub(super) fn is_invalid(self) -> bool {
self.0 & Self::INVALID == Self::INVALID
}
pub(super) fn as_marked(self) -> Self {
Self(self.0 | Self::MARK_TAG)
}
pub(super) fn get_unmarked(self) -> Self {
Self(self.0 & Self::INVALID)
}
pub fn new(val: usize) -> Self {
Self(val as u32)
}
pub fn invalid() -> Self {
Self(Self::INVALID)
}
pub fn pos_checked(&self) -> Option<usize> {
if self.0 == Self::INVALID || self.is_marked() {
None
} else {
Some(self.0 as usize)
}
}
}
/// Fundamental storage unit within the hash table. Either empty or contains a key-value pair.
/// Always part of a chain of some kind (either a freelist if empty or a hash chain if full).
pub(crate) struct Bucket<V> {
pub val: MaybeUninit<V>,
pub next: Atomic<BucketIdx>,
}
impl<V> Bucket<V> {
pub fn empty(next: BucketIdx) -> Self {
Self {
val: MaybeUninit::uninit(),
next: Atomic::new(next)
}
}
pub fn full(val: V) -> Self {
Self {
val: MaybeUninit::new(val),
next: Atomic::new(BucketIdx::invalid())
}
}
// pub is_full
pub fn as_ref(&self) -> &V {
unsafe { self.val.assume_init_ref() }
}
pub fn as_mut(&mut self) -> &mut V {
unsafe { self.val.assume_init_mut() }
}
pub fn replace(&mut self, new_val: V) -> V {
unsafe { std::mem::replace(self.val.assume_init_mut(), new_val) }
}
}
pub(crate) struct BucketArray<'a, V> {
/// Buckets containing values.
pub(crate) buckets: &'a mut [Bucket<V>],
/// Head of the freelist.
pub(crate) free_head: Atomic<BucketIdx>,
/// Maximum index of a bucket allowed to be allocated.
pub(crate) alloc_limit: Atomic<BucketIdx>,
/// The number of currently occupied buckets.
pub(crate) buckets_in_use: AtomicUsize,
// Unclear what the purpose of this is.
pub(crate) _user_list_head: Atomic<BucketIdx>,
}
impl<'a, V> BucketArray<'a, V> {
pub fn new(buckets: &'a mut [Bucket<V>]) -> Self {
debug_assert!(Atomic::<BucketIdx>::is_lock_free());
Self {
buckets,
free_head: Atomic::new(BucketIdx(0)),
_user_list_head: Atomic::new(BucketIdx(0)),
alloc_limit: Atomic::new(BucketIdx::invalid()),
buckets_in_use: 0.into(),
}
}
pub fn dealloc_bucket(&mut self, pos: usize) -> V {
let bucket = &mut self.buckets[pos];
let pos = BucketIdx::new(pos);
loop {
let free = self.free_head.load(Ordering::Relaxed);
bucket.next = Atomic::new(free);
if self.free_head.compare_exchange_weak(
free, pos, Ordering::Relaxed, Ordering::Relaxed
).is_ok() {
self.buckets_in_use.fetch_sub(1, Ordering::Relaxed);
return unsafe { bucket.val.assume_init_read() };
}
}
}
#[allow(unused_assignments)]
fn find_bucket(&self) -> (BucketIdx, BucketIdx) {
let mut left_node = BucketIdx::invalid();
let mut right_node = BucketIdx::invalid();
let mut left_node_next = BucketIdx::invalid();
loop {
let mut t = BucketIdx::invalid();
let mut t_next = self.free_head.load(Ordering::Relaxed);
let alloc_limit = self.alloc_limit.load(Ordering::Relaxed).pos_checked();
while t_next.is_marked() || t.pos_checked()
.map_or(true, |v| alloc_limit.map_or(false, |l| v > l))
{
if t_next.is_marked() {
left_node = t;
left_node_next = t_next;
}
t = t_next.get_unmarked();
if t.is_invalid() { break }
t_next = self.buckets[t.0 as usize].next.load(Ordering::Relaxed);
}
right_node = t;
if left_node_next == right_node {
if !right_node.is_invalid() && self.buckets[right_node.0 as usize]
.next.load(Ordering::Relaxed).is_marked()
{
continue;
} else {
return (left_node, right_node);
}
}
let left_ref = if !left_node.is_invalid() {
&self.buckets[left_node.0 as usize].next
} else { &self.free_head };
if left_ref.compare_exchange_weak(
left_node_next, right_node, Ordering::Relaxed, Ordering::Relaxed
).is_ok() {
if !right_node.is_invalid() && self.buckets[right_node.0 as usize]
.next.load(Ordering::Relaxed).is_marked()
{
continue;
} else {
return (left_node, right_node);
}
}
}
}
#[allow(unused_assignments)]
pub(crate) fn alloc_bucket(&mut self, value: V) -> Option<BucketIdx> {
let mut right_node_next = BucketIdx::invalid();
let mut left_idx = BucketIdx::invalid();
let mut right_idx = BucketIdx::invalid();
loop {
(left_idx, right_idx) = self.find_bucket();
if right_idx.is_invalid() {
return None;
}
let right = &self.buckets[right_idx.0 as usize];
right_node_next = right.next.load(Ordering::Relaxed);
if !right_node_next.is_marked() {
if right.next.compare_exchange_weak(
right_node_next, right_node_next.as_marked(),
Ordering::Relaxed, Ordering::Relaxed
).is_ok() {
break;
}
}
}
let left_ref = if !left_idx.is_invalid() {
&self.buckets[left_idx.0 as usize].next
} else {
&self.free_head
};
if left_ref.compare_exchange_weak(
right_idx, right_node_next,
Ordering::Relaxed, Ordering::Relaxed
).is_err() {
todo!()
}
self.buckets_in_use.fetch_add(1, Ordering::Relaxed);
self.buckets[right_idx.0 as usize].val.write(value);
Some(right_idx)
}
pub fn clear(&mut self) {
for i in 0..self.buckets.len() {
self.buckets[i] = Bucket::empty(
if i < self.buckets.len() - 1 {
BucketIdx::new(i + 1)
} else {
BucketIdx::invalid()
}
);
}
self.free_head.store(BucketIdx(0), Ordering::Relaxed);
self.buckets_in_use.store(0, Ordering::Relaxed);
}
}

View File

@@ -3,35 +3,47 @@
use std::hash::Hash;
use std::mem::MaybeUninit;
use crate::hash::entry::*;
use crate::sync::*;
use crate::hash::{
entry::*,
bucket::{BucketArray, Bucket, BucketIdx}
};
/// Invalid position within the map (either within the dictionary or bucket array).
pub(crate) const INVALID_POS: u32 = u32::MAX;
#[derive(PartialEq, Eq)]
pub(crate) enum EntryType {
Occupied,
Rehash,
Tombstone,
RehashTombstone,
Empty,
}
/// Fundamental storage unit within the hash table. Either empty or contains a key-value pair.
/// Always part of a chain of some kind (either a freelist if empty or a hash chain if full).
pub(crate) struct Bucket<K, V> {
/// Index of next bucket in the chain.
pub(crate) next: u32,
/// Key-value pair contained within bucket.
pub(crate) inner: Option<(K, V)>,
pub(crate) struct EntryKey<K> {
pub(crate) tag: EntryType,
pub(crate) val: MaybeUninit<K>,
}
pub(crate) struct DictShard<'a, K> {
pub(crate) keys: &'a mut [EntryKey<K>],
pub(crate) idxs: &'a mut [BucketIdx],
}
impl<'a, K> DictShard<'a, K> {
fn len(&self) -> usize {
self.keys.len()
}
}
pub(crate) struct MaybeUninitDictShard<'a, K> {
pub(crate) keys: &'a mut [MaybeUninit<EntryKey<K>>],
pub(crate) idxs: &'a mut [MaybeUninit<BucketIdx>],
}
/// Core hash table implementation.
pub(crate) struct CoreHashMap<'a, K, V> {
/// Dictionary used to map hashes to bucket indices.
pub(crate) dictionary: &'a mut [u32],
/// Buckets containing key-value pairs.
pub(crate) buckets: &'a mut [Bucket<K, V>],
/// Head of the freelist.
pub(crate) free_head: u32,
/// Maximum index of a bucket allowed to be allocated. [`INVALID_POS`] if no limit.
pub(crate) alloc_limit: u32,
/// The number of currently occupied buckets.
pub(crate) buckets_in_use: u32,
// pub(crate) lock: libc::pthread_mutex_t,
// Unclear what the purpose of this is.
pub(crate) _user_list_head: u32,
/// Dictionary used to map hashes to bucket indices.
pub(crate) dict_shards: &'a mut [RwLock<DictShard<'a, K>>],
pub(crate) bucket_arr: BucketArray<'a, V>,
}
/// Error for when there are no empty buckets left but one is needed.
@@ -39,140 +51,170 @@ pub(crate) struct CoreHashMap<'a, K, V> {
pub struct FullError();
impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> {
const FILL_FACTOR: f32 = 0.60;
/// Estimate the size of data contained within the the hash map.
pub fn estimate_size(num_buckets: u32) -> usize {
let mut size = 0;
// buckets
size += size_of::<Bucket<K, V>>() * num_buckets as usize;
// dictionary
size += (f32::ceil((size_of::<u32>() * num_buckets as usize) as f32 / Self::FILL_FACTOR))
as usize;
size
}
pub fn new(
buckets: &'a mut [MaybeUninit<Bucket<K, V>>],
dictionary: &'a mut [MaybeUninit<u32>],
buckets: &'a mut [MaybeUninit<Bucket<V>>],
dict_shards: &'a mut [RwLock<MaybeUninitDictShard<'a, K>>],
) -> Self {
// Initialize the buckets
for i in 0..buckets.len() {
buckets[i].write(Bucket {
next: if i < buckets.len() - 1 {
i as u32 + 1
} else {
INVALID_POS
},
inner: None,
});
for i in 0..buckets.len() {
buckets[i].write(Bucket::empty(
if i < buckets.len() - 1 {
BucketIdx::new(i + 1)
} else {
BucketIdx::invalid()
})
);
}
// Initialize the dictionary
for e in dictionary.iter_mut() {
e.write(INVALID_POS);
}
for shard in dict_shards.iter_mut() {
let mut dicts = shard.write();
for e in dicts.keys.iter_mut() {
e.write(EntryKey {
tag: EntryType::Empty,
val: MaybeUninit::uninit(),
});
}
for e in dicts.idxs.iter_mut() {
e.write(BucketIdx::invalid());
}
}
// TODO: use std::slice::assume_init_mut() once it stabilizes
let buckets =
unsafe { std::slice::from_raw_parts_mut(buckets.as_mut_ptr().cast(), buckets.len()) };
let dictionary = unsafe {
std::slice::from_raw_parts_mut(dictionary.as_mut_ptr().cast(), dictionary.len())
unsafe { std::slice::from_raw_parts_mut(buckets.as_mut_ptr().cast(),
buckets.len()) };
let dict_shards = unsafe {
std::slice::from_raw_parts_mut(dict_shards.as_mut_ptr().cast(),
dict_shards.len())
};
Self {
dictionary,
buckets,
free_head: 0,
buckets_in_use: 0,
_user_list_head: INVALID_POS,
alloc_limit: INVALID_POS,
dict_shards,
bucket_arr: BucketArray::new(buckets),
}
}
/// Get the value associated with a key (if it exists) given its hash.
pub fn get_with_hash(&self, key: &K, hash: u64) -> Option<&V> {
let mut next = self.dictionary[hash as usize % self.dictionary.len()];
loop {
if next == INVALID_POS {
return None;
}
pub fn get_with_hash(&'a self, key: &K, hash: u64) -> Option<ValueReadGuard<'a, V>> {
let num_buckets = self.get_num_buckets();
let shard_size = num_buckets / self.dict_shards.len();
let bucket_pos = hash as usize % num_buckets;
let shard_start = bucket_pos / shard_size;
for off in 0..self.dict_shards.len() {
let shard_idx = (shard_start + off) % self.dict_shards.len();
let shard = self.dict_shards[shard_idx].read();
let entry_start = if off == 0 { bucket_pos % shard_size } else { 0 };
for entry_idx in entry_start..shard.len() {
match shard.keys[entry_idx].tag {
EntryType::Empty => return None,
EntryType::Tombstone => continue,
EntryType::Occupied => {
let cand_key = unsafe { shard.keys[entry_idx].val.assume_init_ref() };
if cand_key == key {
let bucket_idx = shard.idxs[entry_idx].pos_checked().expect("position is valid");
return Some(RwLockReadGuard::map(
shard, |_| self.bucket_arr.buckets[bucket_idx].as_ref()
));
}
},
_ => unreachable!(),
}
}
}
None
}
let bucket = &self.buckets[next as usize];
let (bucket_key, bucket_value) = bucket.inner.as_ref().expect("entry is in use");
if bucket_key == key {
return Some(bucket_value);
}
next = bucket.next;
}
}
pub fn entry_with_hash(&'a mut self, key: K, hash: u64) -> Result<Entry<'a, K, V>, FullError> {
// We need to keep holding on the locks for each shard we process since if we don't find the
// key anywhere, we want to insert it at the earliest possible position (which may be several
// shards away). Ideally cross-shard chains are quite rare, so this shouldn't be a big deal.
let mut shards = Vec::new();
let mut insert_pos = None;
let mut insert_shard = None;
let num_buckets = self.get_num_buckets();
let shard_size = num_buckets / self.dict_shards.len();
let bucket_pos = hash as usize % num_buckets;
let shard_start = bucket_pos / shard_size;
for off in 0..self.dict_shards.len() {
let shard_idx = (shard_start + off) % self.dict_shards.len();
let shard = self.dict_shards[shard_idx].write();
let mut inserted = false;
let entry_start = if off == 0 { bucket_pos % shard_size } else { 0 };
for entry_idx in entry_start..shard.len() {
match shard.keys[entry_idx].tag {
EntryType::Empty => {
let (shard, shard_pos) = match (insert_shard, insert_pos) {
(Some(s), Some(p)) => (s, p),
(None, Some(p)) => (shard, p),
(None, None) => (shard, entry_idx),
_ => unreachable!()
};
return Ok(Entry::Vacant(VacantEntry {
_key: key,
shard,
shard_pos,
bucket_arr: &mut self.bucket_arr,
}))
},
EntryType::Tombstone => {
if insert_pos.is_none() {
insert_pos = Some(entry_idx);
inserted = true;
}
},
EntryType::Occupied => {
let cand_key = unsafe { shard.keys[entry_idx].val.assume_init_ref() };
if *cand_key == key {
let bucket_pos = shard.idxs[entry_idx].pos_checked().unwrap();
return Ok(Entry::Occupied(OccupiedEntry {
_key: key,
shard,
shard_pos: entry_idx,
bucket_pos,
bucket_arr: &mut self.bucket_arr,
}));
}
}
_ => unreachable!(),
}
}
if inserted {
insert_shard = Some(shard)
} else {
shards.push(shard);
}
}
if let (Some(shard), Some(shard_pos)) = (insert_shard, insert_pos) {
Ok(Entry::Vacant(VacantEntry {
_key: key,
shard,
shard_pos,
bucket_arr: &mut self.bucket_arr,
}))
} else {
Err(FullError{})
}
}
/// Get number of buckets in map.
pub fn get_num_buckets(&self) -> usize {
self.buckets.len()
self.bucket_arr.buckets.len()
}
/// Clears all entries from the hashmap.
///
/// Does not reset any allocation limits, but does clear any entries beyond them.
pub fn clear(&mut self) {
for i in 0..self.buckets.len() {
self.buckets[i] = Bucket {
next: if i < self.buckets.len() - 1 {
i as u32 + 1
} else {
INVALID_POS
},
inner: None,
}
}
for i in 0..self.dictionary.len() {
self.dictionary[i] = INVALID_POS;
}
let mut shards: Vec<_> = self.dict_shards.iter().map(|x| x.write()).collect();
for shard in shards.iter_mut() {
for e in shard.keys.iter_mut() {
e.tag = EntryType::Empty;
}
for e in shard.idxs.iter_mut() {
*e = BucketIdx::invalid();
}
}
self.free_head = 0;
self.buckets_in_use = 0;
}
/// Find the position of an unused bucket via the freelist and initialize it.
pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result<u32, FullError> {
let mut pos = self.free_head;
// Find the first bucket we're *allowed* to use.
let mut prev = PrevPos::First(self.free_head);
while pos != INVALID_POS && pos >= self.alloc_limit {
let bucket = &mut self.buckets[pos as usize];
prev = PrevPos::Chained(pos);
pos = bucket.next;
}
if pos == INVALID_POS {
return Err(FullError());
}
// Repair the freelist.
match prev {
PrevPos::First(_) => {
let next_pos = self.buckets[pos as usize].next;
self.free_head = next_pos;
}
PrevPos::Chained(p) => {
if p != INVALID_POS {
let next_pos = self.buckets[pos as usize].next;
self.buckets[p as usize].next = next_pos;
}
}
_ => unreachable!(),
}
// Initialize the bucket.
let bucket = &mut self.buckets[pos as usize];
self.buckets_in_use += 1;
bucket.next = INVALID_POS;
bucket.inner = Some((key, value));
Ok(pos)
self.bucket_arr.clear();
}
}

View File

@@ -1,138 +1,76 @@
//! Equivalent of [`std::collections::hash_map::Entry`] for this hashmap.
use crate::hash::core::{CoreHashMap, FullError, INVALID_POS};
use crate::hash::{
core::{DictShard, EntryType},
bucket::{BucketArray, BucketIdx}
};
use crate::sync::{RwLockWriteGuard, ValueWriteGuard};
use std::hash::Hash;
use std::mem;
pub enum Entry<'a, 'b, K, V> {
Occupied(OccupiedEntry<'a, 'b, K, V>),
Vacant(VacantEntry<'a, 'b, K, V>),
pub enum Entry<'a, K, V> {
Occupied(OccupiedEntry<'a, K, V>),
Vacant(VacantEntry<'a, K, V>),
}
/// Enum representing the previous position within a chain.
#[derive(Clone, Copy)]
pub(crate) enum PrevPos {
/// Starting index within the dictionary.
First(u32),
/// Regular index within the buckets.
Chained(u32),
/// Unknown - e.g. the associated entry was retrieved by index instead of chain.
Unknown(u64),
}
pub struct OccupiedEntry<'a, 'b, K, V> {
/// Mutable reference to the map containing this entry.
pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>,
pub struct OccupiedEntry<'a, K, V> {
/// The key of the occupied entry
pub(crate) _key: K,
/// The index of the previous entry in the chain.
pub(crate) prev_pos: PrevPos,
/// Mutable reference to the shard of the map the entry is in.
pub(crate) shard: RwLockWriteGuard<'a, DictShard<'a, K>>,
/// The position of the entry in the map.
pub(crate) shard_pos: usize,
/// Mutable reference to the bucket array containing entry.
pub(crate) bucket_arr: &'a mut BucketArray<'a, V>,
/// The position of the bucket in the [`CoreHashMap`] bucket array.
pub(crate) bucket_pos: u32,
pub(crate) bucket_pos: usize,
}
impl<K, V> OccupiedEntry<'_, '_, K, V> {
impl<K, V> OccupiedEntry<'_, K, V> {
pub fn get(&self) -> &V {
&self.map.buckets[self.bucket_pos as usize]
.inner
.as_ref()
.unwrap()
.1
self.bucket_arr.buckets[self.bucket_pos].as_ref()
}
pub fn get_mut(&mut self) -> &mut V {
&mut self.map.buckets[self.bucket_pos as usize]
.inner
.as_mut()
.unwrap()
.1
self.bucket_arr.buckets[self.bucket_pos].as_mut()
}
/// Inserts a value into the entry, replacing (and returning) the existing value.
pub fn insert(&mut self, value: V) -> V {
let bucket = &mut self.map.buckets[self.bucket_pos as usize];
// This assumes inner is Some, which it must be for an OccupiedEntry
mem::replace(&mut bucket.inner.as_mut().unwrap().1, value)
self.bucket_arr.buckets[self.bucket_pos].replace(value)
}
/// Removes the entry from the hash map, returning the value originally stored within it.
///
/// This may result in multiple bucket accesses if the entry was obtained by index as the
/// previous chain entry needs to be discovered in this case.
///
/// # Panics
/// Panics if the `prev_pos` field is equal to [`PrevPos::Unknown`]. In practice, this means
/// the entry was obtained via calling something like [`CoreHashMap::entry_at_bucket`].
pub fn remove(mut self) -> V {
// If this bucket was queried by index, go ahead and follow its chain from the start.
let prev = if let PrevPos::Unknown(hash) = self.prev_pos {
let dict_idx = hash as usize % self.map.dictionary.len();
let mut prev = PrevPos::First(dict_idx as u32);
let mut curr = self.map.dictionary[dict_idx];
while curr != self.bucket_pos {
assert!(curr != INVALID_POS);
prev = PrevPos::Chained(curr);
curr = self.map.buckets[curr as usize].next;
}
prev
} else {
self.prev_pos
};
// CoreHashMap::remove returns Option<(K, V)>. We know it's Some for an OccupiedEntry.
let bucket = &mut self.map.buckets[self.bucket_pos as usize];
// unlink it from the chain
match prev {
PrevPos::First(dict_pos) => {
self.map.dictionary[dict_pos as usize] = bucket.next;
}
PrevPos::Chained(bucket_pos) => {
// println!("we think prev of {} is {bucket_pos}", self.bucket_pos);
self.map.buckets[bucket_pos as usize].next = bucket.next;
}
_ => unreachable!(),
}
// and add it to the freelist
let free = self.map.free_head;
let bucket = &mut self.map.buckets[self.bucket_pos as usize];
let old_value = bucket.inner.take();
bucket.next = free;
self.map.free_head = self.bucket_pos;
self.map.buckets_in_use -= 1;
old_value.unwrap().1
pub fn remove(&mut self) -> V {
self.shard.idxs[self.shard_pos] = BucketIdx::invalid();
self.shard.keys[self.shard_pos].tag = EntryType::Tombstone;
self.bucket_arr.dealloc_bucket(self.bucket_pos)
}
}
/// An abstract view into a vacant entry within the map.
pub struct VacantEntry<'a, 'b, K, V> {
/// Mutable reference to the map containing this entry.
pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>,
/// The key to be inserted into this entry.
pub(crate) key: K,
/// The position within the dictionary corresponding to the key's hash.
pub(crate) dict_pos: u32,
pub struct VacantEntry<'a, K, V> {
/// The key of the occupied entry
pub(crate) _key: K,
/// Mutable reference to the shard of the map the entry is in.
pub(crate) shard: RwLockWriteGuard<'a, DictShard<'a, K>>,
/// The position of the entry in the map.
pub(crate) shard_pos: usize,
/// Mutable reference to the bucket array containing entry.
pub(crate) bucket_arr: &'a mut BucketArray<'a, V>,
}
impl<'b, K: Clone + Hash + Eq, V> VacantEntry<'_, 'b, K, V> {
impl<'a, K: Clone + Hash + Eq, V> VacantEntry<'a, K, V> {
/// Insert a value into the vacant entry, finding and populating an empty bucket in the process.
///
/// # Errors
/// Will return [`FullError`] if there are no unoccupied buckets in the map.
pub fn insert(mut self, value: V) -> Result<ValueWriteGuard<'b, V>, FullError> {
let pos = self.map.alloc_bucket(self.key, value)?;
if pos == INVALID_POS {
return Err(FullError());
}
self.map.buckets[pos as usize].next = self.map.dictionary[self.dict_pos as usize];
self.map.dictionary[self.dict_pos as usize] = pos;
pub fn insert(mut self, value: V) -> ValueWriteGuard<'a, V> {
let pos = self.bucket_arr.alloc_bucket(value).expect("bucket is available if entry is");
self.shard.keys[self.shard_pos].tag = EntryType::Occupied;
self.shard.keys[self.shard_pos].val.write(self._key);
let idx = pos.pos_checked().expect("position is valid");
self.shard.idxs[self.shard_pos] = pos;
Ok(RwLockWriteGuard::map(self.map, |m| {
&mut m.buckets[pos as usize].inner.as_mut().unwrap().1
}))
RwLockWriteGuard::map(self.shard, |_| {
self.bucket_arr.buckets[idx].as_mut()
})
}
}

View File

@@ -36,18 +36,17 @@ impl<'a> From<&'a [u8]> for TestKey {
}
fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
let w = HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, "test_inserts")
let w = HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, 100, "test_inserts")
.attach_writer();
for (idx, k) in keys.iter().enumerate() {
let res = w.entry((*k).into());
match res {
match res.unwrap() {
Entry::Occupied(mut e) => {
e.insert(idx);
}
Entry::Vacant(e) => {
let res = e.insert(idx);
assert!(res.is_ok());
_ = e.insert(idx);
}
};
}
@@ -112,15 +111,15 @@ fn apply_op(
let entry = map.entry(op.0);
let hash_existing = match op.1 {
Some(new) => match entry {
Some(new) => match entry.unwrap() {
Entry::Occupied(mut e) => Some(e.insert(new)),
Entry::Vacant(e) => {
_ = e.insert(new).unwrap();
_ = e.insert(new);
None
}
},
None => match entry {
Entry::Occupied(e) => Some(e.remove()),
None => match entry.unwrap() {
Entry::Occupied(mut e) => Some(e.remove()),
Entry::Vacant(_) => None,
},
};
@@ -164,15 +163,15 @@ fn do_deletes(
fn do_shrink(
writer: &mut HashMapAccess<TestKey, usize>,
shadow: &mut BTreeMap<TestKey, usize>,
to: u32,
to: usize,
) {
assert!(writer.shrink_goal().is_none());
writer.begin_shrink(to);
assert_eq!(writer.shrink_goal(), Some(to as usize));
while writer.get_num_buckets_in_use() > to as usize {
let (k, _) = shadow.pop_first().unwrap();
let entry = writer.entry(k);
if let Entry::Occupied(e) = entry {
let entry = writer.entry(k).unwrap();
if let Entry::Occupied(mut e) = entry {
e.remove();
}
}
@@ -185,7 +184,7 @@ fn do_shrink(
#[test]
fn random_ops() {
let mut writer =
HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, "test_random")
HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, 10, "test_random")
.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
@@ -200,153 +199,153 @@ fn random_ops() {
}
}
#[test]
fn test_shuffle() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, "test_shuf")
.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
// #[test]
// fn test_shuffle() {
// let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, 10, "test_shuf")
// .attach_writer();
// let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
// let mut rng = rand::rng();
do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
writer.shuffle();
do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
}
// do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
// writer.shuffle();
// do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
// }
#[test]
fn test_grow() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 2000, "test_grow")
.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
// #[test]
// fn test_grow() {
// let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 2000, 10, "test_grow")
// .attach_writer();
// let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
// let mut rng = rand::rng();
do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
let old_usage = writer.get_num_buckets_in_use();
writer.grow(1500).unwrap();
assert_eq!(writer.get_num_buckets_in_use(), old_usage);
assert_eq!(writer.get_num_buckets(), 1500);
do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
}
// do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
// let old_usage = writer.get_num_buckets_in_use();
// writer.grow(1500).unwrap();
// assert_eq!(writer.get_num_buckets_in_use(), old_usage);
// assert_eq!(writer.get_num_buckets(), 1500);
// do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
// }
#[test]
fn test_clear() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, 10, "test_clear")
.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
writer.clear();
assert_eq!(writer.get_num_buckets_in_use(), 0);
assert_eq!(writer.get_num_buckets(), 1500);
while let Some((key, _)) = shadow.pop_first() {
assert!(writer.get(&key).is_none());
}
do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
for i in 0..(1500 - writer.get_num_buckets_in_use()) {
writer.insert((1500 + i as u128).into(), 0).unwrap();
}
assert_eq!(writer.insert(5000.into(), 0), Err(FullError {}));
writer.clear();
assert!(writer.insert(5000.into(), 0).is_ok());
// let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
// let mut rng = rand::rng();
// do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
// writer.clear();
// assert_eq!(writer.get_num_buckets_in_use(), 0);
// assert_eq!(writer.get_num_buckets(), 1500);
// while let Some((key, _)) = shadow.pop_first() {
// assert!(writer.get(&key).is_none());
// }
// do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
// for i in 0..(1500 - writer.get_num_buckets_in_use()) {
// writer.insert((1500 + i as u128).into(), 0).unwrap();
// }
// assert_eq!(writer.insert(5000.into(), 0), Err(FullError {}));
// writer.clear();
// assert!(writer.insert(5000.into(), 0).is_ok());
}
#[test]
fn test_idx_remove() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
for _ in 0..100 {
let idx = (rng.next_u32() % 1500) as usize;
if let Some(e) = writer.entry_at_bucket(idx) {
shadow.remove(&e._key);
e.remove();
}
}
while let Some((key, val)) = shadow.pop_first() {
assert_eq!(*writer.get(&key).unwrap(), val);
}
}
// #[test]
// fn test_idx_remove() {
// let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, 10, "test_clear")
// .attach_writer();
// let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
// let mut rng = rand::rng();
// do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
// for _ in 0..100 {
// let idx = (rng.next_u32() % 1500) as usize;
// if let Some(e) = writer.entry_at_bucket(idx) {
// shadow.remove(&e._key);
// e.remove();
// }
// }
// while let Some((key, val)) = shadow.pop_first() {
// assert_eq!(*writer.get(&key).unwrap(), val);
// }
// }
#[test]
fn test_idx_get() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
for _ in 0..100 {
let idx = (rng.next_u32() % 1500) as usize;
if let Some(pair) = writer.get_at_bucket(idx) {
{
let v: *const usize = &pair.1;
assert_eq!(writer.get_bucket_for_value(v), idx);
}
{
let v: *const usize = &pair.1;
assert_eq!(writer.get_bucket_for_value(v), idx);
}
}
}
}
// #[test]
// fn test_idx_get() {
// let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
// .attach_writer();
// let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
// let mut rng = rand::rng();
// do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
// for _ in 0..100 {
// let idx = (rng.next_u32() % 1500) as usize;
// if let Some(pair) = writer.get_at_bucket(idx) {
// {
// let v: *const usize = &pair.1;
// assert_eq!(writer.get_bucket_for_value(v), idx);
// }
// {
// let v: *const usize = &pair.1;
// assert_eq!(writer.get_bucket_for_value(v), idx);
// }
// }
// }
// }
#[test]
fn test_shrink() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink")
.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
// #[test]
// fn test_shrink() {
// let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink")
// .attach_writer();
// let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
// let mut rng = rand::rng();
do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
do_shrink(&mut writer, &mut shadow, 1000);
assert_eq!(writer.get_num_buckets(), 1000);
do_deletes(500, &mut writer, &mut shadow);
do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng);
assert!(writer.get_num_buckets_in_use() <= 1000);
}
// do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
// do_shrink(&mut writer, &mut shadow, 1000);
// assert_eq!(writer.get_num_buckets(), 1000);
// do_deletes(500, &mut writer, &mut shadow);
// do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng);
// assert!(writer.get_num_buckets_in_use() <= 1000);
// }
#[test]
fn test_shrink_grow_seq() {
let mut writer =
HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 20000, "test_grow_seq")
.attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
// #[test]
// fn test_shrink_grow_seq() {
// let mut writer =
// HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 20000, "test_grow_seq")
// .attach_writer();
// let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
// let mut rng = rand::rng();
do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng);
eprintln!("Shrinking to 750");
do_shrink(&mut writer, &mut shadow, 750);
do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng);
eprintln!("Growing to 1500");
writer.grow(1500).unwrap();
do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng);
eprintln!("Shrinking to 200");
while shadow.len() > 100 {
do_deletes(1, &mut writer, &mut shadow);
}
do_shrink(&mut writer, &mut shadow, 200);
do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
eprintln!("Growing to 10k");
writer.grow(10000).unwrap();
do_random_ops(10000, 5000, 0.25, &mut writer, &mut shadow, &mut rng);
}
// do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng);
// eprintln!("Shrinking to 750");
// do_shrink(&mut writer, &mut shadow, 750);
// do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng);
// eprintln!("Growing to 1500");
// writer.grow(1500).unwrap();
// do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng);
// eprintln!("Shrinking to 200");
// while shadow.len() > 100 {
// do_deletes(1, &mut writer, &mut shadow);
// }
// do_shrink(&mut writer, &mut shadow, 200);
// do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
// eprintln!("Growing to 10k");
// writer.grow(10000).unwrap();
// do_random_ops(10000, 5000, 0.25, &mut writer, &mut shadow, &mut rng);
// }
#[test]
fn test_bucket_ops() {
let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, "test_bucket_ops")
let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, 10, "test_bucket_ops")
.attach_writer();
match writer.entry(1.into()) {
match writer.entry(1.into()).unwrap() {
Entry::Occupied(mut e) => {
e.insert(2);
}
Entry::Vacant(e) => {
_ = e.insert(2).unwrap();
}
_ = e.insert(2);
},
}
assert_eq!(writer.get_num_buckets_in_use(), 1);
assert_eq!(writer.get_num_buckets(), 1000);
assert_eq!(*writer.get(&1.into()).unwrap(), 2);
let pos = match writer.entry(1.into()) {
let pos = match writer.entry(1.into()).unwrap() {
Entry::Occupied(e) => {
assert_eq!(e._key, 1.into());
let pos = e.bucket_pos as usize;
@@ -356,8 +355,7 @@ fn test_bucket_ops() {
panic!("Insert didn't affect entry");
}
};
assert_eq!(writer.entry_at_bucket(pos).unwrap()._key, 1.into());
assert_eq!(*writer.get_at_bucket(pos).unwrap(), (1.into(), 2));
assert_eq!(writer.get_at_bucket(pos).unwrap(), &2);
{
let ptr: *const usize = &*writer.get(&1.into()).unwrap();
assert_eq!(writer.get_bucket_for_value(ptr), pos);
@@ -366,64 +364,64 @@ fn test_bucket_ops() {
assert!(writer.get(&1.into()).is_none());
}
#[test]
fn test_shrink_zero() {
let mut writer =
HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink_zero")
.attach_writer();
writer.begin_shrink(0);
for i in 0..1500 {
writer.entry_at_bucket(i).map(|x| x.remove());
}
writer.finish_shrink().unwrap();
assert_eq!(writer.get_num_buckets_in_use(), 0);
let entry = writer.entry(1.into());
if let Entry::Vacant(v) = entry {
assert!(v.insert(2).is_err());
} else {
panic!("Somehow got non-vacant entry in empty map.")
}
writer.grow(50).unwrap();
let entry = writer.entry(1.into());
if let Entry::Vacant(v) = entry {
assert!(v.insert(2).is_ok());
} else {
panic!("Somehow got non-vacant entry in empty map.")
}
assert_eq!(writer.get_num_buckets_in_use(), 1);
}
// #[test]
// fn test_shrink_zero() {
// let mut writer =
// HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink_zero")
// .attach_writer();
// writer.begin_shrink(0);
// for i in 0..1500 {
// writer.entry_at_bucket(i).map(|x| x.remove());
// }
// writer.finish_shrink().unwrap();
// assert_eq!(writer.get_num_buckets_in_use(), 0);
// let entry = writer.entry(1.into());
// if let Entry::Vacant(v) = entry {
// assert!(v.insert(2).is_err());
// } else {
// panic!("Somehow got non-vacant entry in empty map.")
// }
// writer.grow(50).unwrap();
// let entry = writer.entry(1.into());
// if let Entry::Vacant(v) = entry {
// assert!(v.insert(2).is_ok());
// } else {
// panic!("Somehow got non-vacant entry in empty map.")
// }
// assert_eq!(writer.get_num_buckets_in_use(), 1);
// }
#[test]
#[should_panic]
fn test_grow_oom() {
let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_grow_oom")
.attach_writer();
writer.grow(20000).unwrap();
}
// #[test]
// #[should_panic]
// fn test_grow_oom() {
// let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_grow_oom")
// .attach_writer();
// writer.grow(20000).unwrap();
// }
#[test]
#[should_panic]
fn test_shrink_bigger() {
let mut writer =
HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_bigger")
.attach_writer();
writer.begin_shrink(2000);
}
// #[test]
// #[should_panic]
// fn test_shrink_bigger() {
// let mut writer =
// HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_bigger")
// .attach_writer();
// writer.begin_shrink(2000);
// }
#[test]
#[should_panic]
fn test_shrink_early_finish() {
let writer =
HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_early_finish")
.attach_writer();
writer.finish_shrink().unwrap();
}
// #[test]
// #[should_panic]
// fn test_shrink_early_finish() {
// let writer =
// HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_early_finish")
// .attach_writer();
// writer.finish_shrink().unwrap();
// }
#[test]
#[should_panic]
fn test_shrink_fixed_size() {
let mut area = [MaybeUninit::uninit(); 10000];
let init_struct = HashMapInit::<TestKey, usize>::with_fixed(3, &mut area);
let mut writer = init_struct.attach_writer();
writer.begin_shrink(1);
}
// #[test]
// #[should_panic]
// fn test_shrink_fixed_size() {
// let mut area = [MaybeUninit::uninit(); 10000];
// let init_struct = HashMapInit::<TestKey, usize>::with_fixed(3, &mut area);
// let mut writer = init_struct.attach_writer();
// writer.begin_shrink(1);
// }

View File

@@ -6,6 +6,7 @@ use std::ptr::NonNull;
use nix::errno::Errno;
pub type RwLock<T> = lock_api::RwLock<PthreadRwLock, T>;
pub type Mutex<T> = lock_api::Mutex<PthreadMutex, T>;
pub(crate) type RwLockReadGuard<'a, T> = lock_api::RwLockReadGuard<'a, PthreadRwLock, T>;
pub type RwLockWriteGuard<'a, T> = lock_api::RwLockWriteGuard<'a, PthreadRwLock, T>;
pub type ValueReadGuard<'a, T> = lock_api::MappedRwLockReadGuard<'a, PthreadRwLock, T>;
@@ -43,7 +44,7 @@ impl PthreadRwLock {
fn inner(&self) -> NonNull<libc::pthread_rwlock_t> {
self.0.unwrap_or_else(
|| panic!("PthreadRwLock constructed badly - something likely used RawMutex::INIT")
|| panic!("PthreadRwLock constructed badly - something likely used RawRwLock::INIT")
)
}
@@ -105,3 +106,64 @@ unsafe impl lock_api::RawRwLock for PthreadRwLock {
self.unlock();
}
}
pub struct PthreadMutex(Option<NonNull<libc::pthread_mutex_t>>);
impl PthreadMutex {
pub fn new(lock: NonNull<libc::pthread_mutex_t>) -> Self {
unsafe {
let mut attrs = MaybeUninit::uninit();
// Ignoring return value here - only possible error is OOM.
libc::pthread_mutexattr_init(attrs.as_mut_ptr());
libc::pthread_mutexattr_setpshared(
attrs.as_mut_ptr(),
libc::PTHREAD_PROCESS_SHARED
);
libc::pthread_mutex_init(lock.as_ptr(), attrs.as_mut_ptr());
// Safety: POSIX specifies that "any function affecting the attributes
// object (including destruction) shall not affect any previously
// initialized read-write locks".
libc::pthread_mutexattr_destroy(attrs.as_mut_ptr());
Self(Some(lock))
}
}
fn inner(&self) -> NonNull<libc::pthread_mutex_t> {
self.0.unwrap_or_else(
|| panic!("PthreadMutex constructed badly - something likely used RawMutex::INIT")
)
}
}
unsafe impl lock_api::RawMutex for PthreadMutex {
type GuardMarker = lock_api::GuardSend;
/// *DO NOT USE THIS.* See [`PthreadRwLock`] for the full explanation.
const INIT: Self = Self(None);
fn lock(&self) {
unsafe {
let res = libc::pthread_mutex_lock(self.inner().as_ptr());
assert!(res == 0, "lock failed with {}", Errno::from_raw(res));
}
}
fn try_lock(&self) -> bool {
unsafe {
let res = libc::pthread_mutex_trylock(self.inner().as_ptr());
match res {
0 => true,
libc::EAGAIN => false,
o => panic!("try_rdlock failed with {}", Errno::from_raw(o)),
}
}
}
unsafe fn unlock(&self) {
unsafe {
let res = libc::pthread_mutex_unlock(self.inner().as_ptr());
assert!(res == 0, "unlock failed with {}", Errno::from_raw(res));
}
}
}