Merge branch 'quantumish/lfc-resizable-map' into communicator-rewrite

This commit is contained in:
David Freifeld
2025-06-25 19:24:17 -07:00
11 changed files with 2865 additions and 1743 deletions

2998
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -7,12 +7,23 @@ license.workspace = true
[dependencies]
thiserror.workspace = true
nix.workspace = true
spin.workspace = true
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
rustc-hash = { version = "2.1.1" }
rand = "0.9.1"
[dev-dependencies]
rand = "0.9.1"
criterion = { workspace = true, features = ["html_reports"] }
rand_distr = "0.5.1"
xxhash-rust = { version = "0.8.15", features = ["xxh3"] }
ahash.workspace = true
twox-hash = { version = "2.1.1" }
seahash = "4.1.0"
hashbrown = { git = "https://github.com/quantumish/hashbrown.git", rev = "6610e6d" }
foldhash = "0.1.5"
[target.'cfg(target_os = "macos")'.dependencies]
tempfile = "3.14.0"
[[bench]]
name = "hmap_resize"
harness = false

View File

@@ -0,0 +1,286 @@
use std::hint::black_box;
use criterion::{criterion_group, criterion_main, BatchSize, Criterion, BenchmarkId};
use neon_shmem::hash::HashMapAccess;
use neon_shmem::hash::HashMapInit;
use neon_shmem::hash::entry::Entry;
use neon_shmem::shmem::ShmemHandle;
use rand::prelude::*;
use rand::distr::{Distribution, StandardUniform};
use std::hash::BuildHasher;
use std::default::Default;
// Taken from bindings to C code
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
#[repr(C)]
pub struct FileCacheKey {
pub _spc_id: u32,
pub _db_id: u32,
pub _rel_number: u32,
pub _fork_num: u32,
pub _block_num: u32,
}
impl Distribution<FileCacheKey> for StandardUniform {
// questionable, but doesn't need to be good randomness
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> FileCacheKey {
FileCacheKey {
_spc_id: rng.random(),
_db_id: rng.random(),
_rel_number: rng.random(),
_fork_num: rng.random(),
_block_num: rng.random()
}
}
}
#[derive(Clone, Debug)]
#[repr(C)]
pub struct FileCacheEntry {
pub _offset: u32,
pub _access_count: u32,
pub _prev: *mut FileCacheEntry,
pub _next: *mut FileCacheEntry,
pub _state: [u32; 8],
}
impl FileCacheEntry {
fn dummy() -> Self {
Self {
_offset: 0,
_access_count: 0,
_prev: std::ptr::null_mut(),
_next: std::ptr::null_mut(),
_state: [0; 8]
}
}
}
// Utilities for applying operations.
#[derive(Clone, Debug)]
struct TestOp<K,V>(K, Option<V>);
fn apply_op<K: Clone + std::hash::Hash + Eq, V, S: std::hash::BuildHasher>(
op: TestOp<K,V>,
map: &mut HashMapAccess<K,V,S>,
) {
let hash = map.get_hash_value(&op.0);
let entry = map.entry_with_hash(op.0, hash);
match op.1 {
Some(new) => {
match entry {
Entry::Occupied(mut e) => Some(e.insert(new)),
Entry::Vacant(e) => { e.insert(new).unwrap(); None },
}
},
None => {
match entry {
Entry::Occupied(e) => Some(e.remove()),
Entry::Vacant(_) => None,
}
},
};
}
// Hash utilities
struct SeaRandomState {
k1: u64,
k2: u64,
k3: u64,
k4: u64
}
impl std::hash::BuildHasher for SeaRandomState {
type Hasher = seahash::SeaHasher;
fn build_hasher(&self) -> Self::Hasher {
seahash::SeaHasher::with_seeds(self.k1, self.k2, self.k3, self.k4)
}
}
impl SeaRandomState {
fn new() -> Self {
let mut rng = rand::rng();
Self { k1: rng.random(), k2: rng.random(), k3: rng.random(), k4: rng.random() }
}
}
fn small_benchs(c: &mut Criterion) {
let mut group = c.benchmark_group("Small maps");
group.sample_size(10);
group.bench_function("small_rehash", |b| {
let ideal_filled = 4_000_000;
let size = 5_000_000;
let mut writer = HashMapInit::new_resizeable(size, size * 2).attach_writer();
let mut rng = rand::rng();
while writer.get_num_buckets_in_use() < ideal_filled as usize {
let key: FileCacheKey = rng.random();
let val = FileCacheEntry::dummy();
apply_op(TestOp(key, Some(val)), &mut writer);
}
b.iter(|| writer.shuffle());
});
group.bench_function("small_rehash_xxhash", |b| {
let ideal_filled = 4_000_000;
let size = 5_000_000;
let mut writer = HashMapInit::new_resizeable(size, size * 2)
.with_hasher(twox_hash::xxhash64::RandomState::default())
.attach_writer();
let mut rng = rand::rng();
while writer.get_num_buckets_in_use() < ideal_filled as usize {
let key: FileCacheKey = rng.random();
let val = FileCacheEntry::dummy();
apply_op(TestOp(key, Some(val)), &mut writer);
}
b.iter(|| writer.shuffle());
});
group.bench_function("small_rehash_ahash", |b| {
let ideal_filled = 4_000_000;
let size = 5_000_000;
let mut writer = HashMapInit::new_resizeable(size, size * 2)
.with_hasher(ahash::RandomState::default())
.attach_writer();
let mut rng = rand::rng();
while writer.get_num_buckets_in_use() < ideal_filled as usize {
let key: FileCacheKey = rng.random();
let val = FileCacheEntry::dummy();
apply_op(TestOp(key, Some(val)), &mut writer);
}
b.iter(|| writer.shuffle());
});
group.bench_function("small_rehash_seahash", |b| {
let ideal_filled = 4_000_000;
let size = 5_000_000;
let mut writer = HashMapInit::new_resizeable(size, size * 2)
.with_hasher(SeaRandomState::new())
.attach_writer();
let mut rng = rand::rng();
while writer.get_num_buckets_in_use() < ideal_filled as usize {
let key: FileCacheKey = rng.random();
let val = FileCacheEntry::dummy();
apply_op(TestOp(key, Some(val)), &mut writer);
}
b.iter(|| writer.shuffle());
});
group.finish();
}
fn real_benchs(c: &mut Criterion) {
let mut group = c.benchmark_group("Realistic workloads");
group.sample_size(10);
group.bench_function("real_bulk_insert", |b| {
let size = 125_000_000;
let ideal_filled = 100_000_000;
let mut rng = rand::rng();
b.iter_batched(
|| HashMapInit::new_resizeable(size, size * 2).attach_writer(),
|mut writer| {
for i in 0..ideal_filled {
let key: FileCacheKey = rng.random();
let val = FileCacheEntry::dummy();
let hash = writer.get_hash_value(&key);
let entry = writer.entry_with_hash(key, hash);
std::hint::black_box(match entry {
Entry::Occupied(mut e) => { e.insert(val); },
Entry::Vacant(e) => { e.insert(val).unwrap(); },
})
}
},
BatchSize::SmallInput,
)
});
group.bench_function("real_rehash", |b| {
let size = 125_000_000;
let ideal_filled = 100_000_000;
let mut writer = HashMapInit::new_resizeable(size, size).attach_writer();
let mut rng = rand::rng();
while writer.get_num_buckets_in_use() < ideal_filled {
let key: FileCacheKey = rng.random();
let val = FileCacheEntry::dummy();
apply_op(TestOp(key, Some(val)), &mut writer);
}
b.iter(|| writer.shuffle());
});
group.bench_function("real_rehash_hashbrown", |b| {
let size = 125_000_000;
let ideal_filled = 100_000_000;
let mut writer = hashbrown::raw::RawTable::new();
let mut rng = rand::rng();
let hasher = rustc_hash::FxBuildHasher::default();
unsafe {
writer.resize(size, |(k,_)| hasher.hash_one(&k),
hashbrown::raw::Fallibility::Infallible).unwrap();
}
while writer.len() < ideal_filled as usize {
let key: FileCacheKey = rng.random();
let val = FileCacheEntry::dummy();
writer.insert(hasher.hash_one(&key), (key, val), |(k,_)| hasher.hash_one(&k));
}
b.iter(|| unsafe { writer.table.rehash_in_place(
&|table, index| hasher.hash_one(&table.bucket::<(FileCacheKey, FileCacheEntry)>(index).as_ref().0),
std::mem::size_of::<(FileCacheKey, FileCacheEntry)>(),
if std::mem::needs_drop::<(FileCacheKey, FileCacheEntry)>() {
Some(|ptr| std::ptr::drop_in_place(ptr as *mut (FileCacheKey, FileCacheEntry)))
} else {
None
},
) });
});
for elems in [2, 4, 8, 16, 32, 64, 96, 112] {
group.bench_with_input(BenchmarkId::new("real_rehash_varied", elems), &elems, |b, &size| {
let ideal_filled = size * 1_000_000;
let size = 125_000_000;
let mut writer = HashMapInit::new_resizeable(size, size).attach_writer();
let mut rng = rand::rng();
while writer.get_num_buckets_in_use() < ideal_filled as usize {
let key: FileCacheKey = rng.random();
let val = FileCacheEntry::dummy();
apply_op(TestOp(key, Some(val)), &mut writer);
}
b.iter(|| writer.shuffle());
});
group.bench_with_input(BenchmarkId::new("real_rehash_varied_hashbrown", elems), &elems, |b, &size| {
let ideal_filled = size * 1_000_000;
let size = 125_000_000;
let mut writer = hashbrown::raw::RawTable::new();
let mut rng = rand::rng();
let hasher = rustc_hash::FxBuildHasher::default();
unsafe {
writer.resize(size, |(k,_)| hasher.hash_one(&k),
hashbrown::raw::Fallibility::Infallible).unwrap();
}
while writer.len() < ideal_filled as usize {
let key: FileCacheKey = rng.random();
let val = FileCacheEntry::dummy();
writer.insert(hasher.hash_one(&key), (key, val), |(k,_)| hasher.hash_one(&k));
}
b.iter(|| unsafe { writer.table.rehash_in_place(
&|table, index| hasher.hash_one(&table.bucket::<(FileCacheKey, FileCacheEntry)>(index).as_ref().0),
std::mem::size_of::<(FileCacheKey, FileCacheEntry)>(),
if std::mem::needs_drop::<(FileCacheKey, FileCacheEntry)>() {
Some(|ptr| std::ptr::drop_in_place(ptr as *mut (FileCacheKey, FileCacheEntry)))
} else {
None
},
) });
});
}
group.finish();
}
criterion_group!(benches, small_benchs, real_benchs);
criterion_main!(benches);

View File

@@ -1,273 +1,339 @@
//! Hash table implementation on top of 'shmem'
//! Resizable hash table implementation on top of byte-level storage (either `shmem` or fixed byte array).
//!
//! Features required in the long run by the communicator project:
//! This hash table has two major components: the bucket array and the dictionary. Each bucket within the
//! bucket array contains a Option<(K, V)> and an index of another bucket. In this way there is both an
//! implicit freelist within the bucket array (None buckets point to other None entries) and various hash
//! chains within the bucket array (a Some bucket will point to other Some buckets that had the same hash).
//!
//! [X] Accessible from both Postgres processes and rust threads in the communicator process
//! [X] Low latency
//! [ ] Scalable to lots of concurrent accesses (currently uses a single spinlock)
//! [ ] Resizable
//! Buckets are never moved unless they are within a region that is being shrunk, and so the actual hash-
//! dependent component is done with the dictionary. When a new key is inserted into the map, a position
//! within the dictionary is decided based on its hash, the data is inserted into an empty bucket based
//! off of the freelist, and then the index of said bucket is placed in the dictionary.
//!
//! This map is resizable (if initialized on top of a `ShmemHandle`). Both growing and shrinking happen
//! in-place and are at a high level achieved by expanding/reducing the bucket array and rebuilding the
//! dictionary by rehashing all keys.
use std::fmt::Debug;
use std::hash::Hash;
use std::hash::{Hash, BuildHasher};
use std::mem::MaybeUninit;
use std::ops::Deref;
use crate::shmem::ShmemHandle;
use spin;
mod core;
pub mod entry;
#[cfg(test)]
mod tests;
use core::CoreHashMap;
use core::{Bucket, CoreHashMap, INVALID_POS};
use entry::{Entry, OccupiedEntry};
pub enum UpdateAction<V> {
Nothing,
Insert(V),
Remove,
}
#[derive(Debug)]
pub struct OutOfMemoryError();
pub struct HashMapInit<'a, K, V> {
// Hash table can be allocated in a fixed memory area, or in a resizeable ShmemHandle.
/// Builder for a `HashMapAccess`.
pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> {
shmem_handle: Option<ShmemHandle>,
shared_ptr: *mut HashMapShared<'a, K, V>,
shared_size: usize,
hasher: S,
num_buckets: u32,
}
pub struct HashMapAccess<'a, K, V> {
/// Accessor for a hash table.
pub struct HashMapAccess<'a, K, V, S = rustc_hash::FxBuildHasher> {
shmem_handle: Option<ShmemHandle>,
shared_ptr: *mut HashMapShared<'a, K, V>,
hasher: S,
}
unsafe impl<'a, K: Sync, V: Sync> Sync for HashMapAccess<'a, K, V> {}
unsafe impl<'a, K: Send, V: Send> Send for HashMapAccess<'a, K, V> {}
unsafe impl<'a, K: Sync, V: Sync, S> Sync for HashMapAccess<'a, K, V, S> {}
unsafe impl<'a, K: Send, V: Send, S> Send for HashMapAccess<'a, K, V, S> {}
impl<'a, K, V> HashMapInit<'a, K, V> {
pub fn attach_writer(self) -> HashMapAccess<'a, K, V> {
HashMapAccess {
shmem_handle: self.shmem_handle,
shared_ptr: self.shared_ptr,
}
}
impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {
pub fn with_hasher<T: BuildHasher>(self, hasher: T) -> HashMapInit<'a, K, V, T> {
HashMapInit {
hasher,
shmem_handle: self.shmem_handle,
shared_ptr: self.shared_ptr,
shared_size: self.shared_size,
num_buckets: self.num_buckets,
}
}
pub fn attach_reader(self) -> HashMapAccess<'a, K, V> {
// no difference to attach_writer currently
self.attach_writer()
}
}
// This is stored in the shared memory area
struct HashMapShared<'a, K, V> {
inner: spin::RwLock<CoreHashMap<'a, K, V>>,
}
impl<'a, K, V> HashMapInit<'a, K, V>
where
K: Clone + Hash + Eq,
{
pub fn estimate_size(num_buckets: u32) -> usize {
/// Loosely (over)estimate the size needed to store a hash table with `num_buckets` buckets.
pub fn estimate_size(num_buckets: u32) -> usize {
// add some margin to cover alignment etc.
CoreHashMap::<K, V>::estimate_size(num_buckets) + size_of::<HashMapShared<K, V>>() + 1000
}
pub fn init_in_fixed_area(
num_buckets: u32,
area: &'a mut [MaybeUninit<u8>],
) -> HashMapInit<'a, K, V> {
Self::init_common(num_buckets, None, area.as_mut_ptr().cast(), area.len())
}
/// Initialize a new hash map in the given shared memory area
pub fn init_in_shmem(num_buckets: u32, mut shmem: ShmemHandle) -> HashMapInit<'a, K, V> {
let size = Self::estimate_size(num_buckets);
shmem
.set_size(size)
.expect("could not resize shared memory area");
let ptr = unsafe { shmem.data_ptr.as_mut() };
Self::init_common(num_buckets, Some(shmem), ptr, size)
}
fn init_common(
num_buckets: u32,
shmem_handle: Option<ShmemHandle>,
area_ptr: *mut u8,
area_len: usize,
) -> HashMapInit<'a, K, V> {
// carve out HashMapShared from the area. This does not include the hashmap's dictionary
// and buckets.
let mut ptr: *mut u8 = area_ptr;
/// Initialize a table for writing.
pub fn attach_writer(self) -> HashMapAccess<'a, K, V, S> {
// carve out the HashMapShared struct from the area.
let mut ptr: *mut u8 = self.shared_ptr.cast();
let end_ptr: *mut u8 = unsafe { ptr.add(self.shared_size) };
ptr = unsafe { ptr.add(ptr.align_offset(align_of::<HashMapShared<K, V>>())) };
let shared_ptr: *mut HashMapShared<K, V> = ptr.cast();
ptr = unsafe { ptr.add(size_of::<HashMapShared<K, V>>()) };
// the rest of the space is given to the hash map's dictionary and buckets
let remaining_area = unsafe {
std::slice::from_raw_parts_mut(ptr, area_len - ptr.offset_from(area_ptr) as usize)
// carve out the buckets
ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<core::Bucket<K, V>>())) };
let buckets_ptr = ptr;
ptr = unsafe { ptr.add(size_of::<core::Bucket<K, V>>() * self.num_buckets as usize) };
// use remaining space for the dictionary
ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<u32>())) };
assert!(ptr.addr() < end_ptr.addr());
let dictionary_ptr = ptr;
let dictionary_size = unsafe { end_ptr.byte_offset_from(ptr) / size_of::<u32>() as isize };
assert!(dictionary_size > 0);
let buckets =
unsafe { std::slice::from_raw_parts_mut(buckets_ptr.cast(), self.num_buckets as usize) };
let dictionary = unsafe {
std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize)
};
let hashmap = CoreHashMap::new(num_buckets, remaining_area);
let hashmap = CoreHashMap::new(buckets, dictionary);
unsafe {
std::ptr::write(
shared_ptr,
HashMapShared {
inner: spin::RwLock::new(hashmap),
},
);
std::ptr::write(shared_ptr, HashMapShared { inner: hashmap });
}
HashMapAccess {
shmem_handle: self.shmem_handle,
shared_ptr: self.shared_ptr,
hasher: self.hasher,
}
}
HashMapInit {
shmem_handle: shmem_handle,
shared_ptr,
}
/// Initialize a table for reading. Currently identical to `attach_writer`.
pub fn attach_reader(self) -> HashMapAccess<'a, K, V, S> {
self.attach_writer()
}
}
impl<'a, K, V> HashMapAccess<'a, K, V>
/// Hash table data that is actually stored in the shared memory area.
///
/// NOTE: We carve out the parts from a contiguous chunk. Growing and shrinking the hash table
/// relies on the memory layout! The data structures are laid out in the contiguous shared memory
/// area as follows:
///
/// HashMapShared
/// [buckets]
/// [dictionary]
///
/// In between the above parts, there can be padding bytes to align the parts correctly.
struct HashMapShared<'a, K, V> {
inner: CoreHashMap<'a, K, V>
}
impl<'a, K, V> HashMapInit<'a, K, V, rustc_hash::FxBuildHasher>
where
K: Clone + Hash + Eq
{
/// Place the hash table within a user-supplied fixed memory area.
pub fn with_fixed(
num_buckets: u32,
area: &'a mut [MaybeUninit<u8>],
) -> HashMapInit<'a, K, V> {
Self {
num_buckets,
shmem_handle: None,
shared_ptr: area.as_mut_ptr().cast(),
shared_size: area.len(),
hasher: rustc_hash::FxBuildHasher::default(),
}
}
/// Place a new hash map in the given shared memory area
pub fn with_shmem(num_buckets: u32, shmem: ShmemHandle) -> HashMapInit<'a, K, V> {
let size = Self::estimate_size(num_buckets);
shmem
.set_size(size)
.expect("could not resize shared memory area");
Self {
num_buckets,
shared_ptr: shmem.data_ptr.as_ptr().cast(),
shmem_handle: Some(shmem),
shared_size: size,
hasher: rustc_hash::FxBuildHasher::default()
}
}
/// Make a resizable hash map within a new shared memory area with the given name.
pub fn new_resizeable_named(num_buckets: u32, max_buckets: u32, name: &str) -> HashMapInit<'a, K, V> {
let size = Self::estimate_size(num_buckets);
let max_size = Self::estimate_size(max_buckets);
let shmem = ShmemHandle::new(name, size, max_size)
.expect("failed to make shared memory area");
Self {
num_buckets,
shared_ptr: shmem.data_ptr.as_ptr().cast(),
shmem_handle: Some(shmem),
shared_size: size,
hasher: rustc_hash::FxBuildHasher::default()
}
}
/// Make a resizable hash map within a new anonymous shared memory area.
pub fn new_resizeable(num_buckets: u32, max_buckets: u32) -> HashMapInit<'a, K, V> {
use std::sync::atomic::{AtomicUsize, Ordering};
const COUNTER: AtomicUsize = AtomicUsize::new(0);
let val = COUNTER.fetch_add(1, Ordering::Relaxed);
let name = format!("neon_shmem_hmap{}", val);
Self::new_resizeable_named(num_buckets, max_buckets, &name)
}
}
impl<'a, K, V, S: BuildHasher> HashMapAccess<'a, K, V, S>
where
K: Clone + Hash + Eq,
{
pub fn get<'e>(&'e self, key: &K) -> Option<ValueReadGuard<'e, K, V>> {
/// Hash a key using the map's hasher.
pub fn get_hash_value(&self, key: &K) -> u64 {
self.hasher.hash_one(key)
}
/// Get a reference to the corresponding value for a key given its hash.
pub fn get_with_hash<'e>(&'e self, key: &K, hash: u64) -> Option<&'e V> {
let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
let lock_guard = map.inner.read();
match lock_guard.get(key) {
None => None,
Some(val_ref) => {
let val_ptr = std::ptr::from_ref(val_ref);
Some(ValueReadGuard {
_lock_guard: lock_guard,
value: val_ptr,
})
}
}
map.inner.get_with_hash(key, hash)
}
/// Insert a value
pub fn insert(&self, key: &K, value: V) -> Result<bool, OutOfMemoryError> {
let mut success = None;
/// Get a reference to the entry containing a key given its hash.
pub fn entry_with_hash(&mut self, key: K, hash: u64) -> Entry<'a, '_, K, V> {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
self.update_with_fn(key, |existing| {
if let Some(_) = existing {
success = Some(false);
UpdateAction::Nothing
} else {
success = Some(true);
UpdateAction::Insert(value)
}
})?;
Ok(success.expect("value_fn not called"))
map.inner.entry_with_hash(key, hash)
}
/// Remove value. Returns true if it existed
pub fn remove(&self, key: &K) -> bool {
let mut result = false;
self.update_with_fn(key, |existing| match existing {
Some(_) => {
result = true;
UpdateAction::Remove
/// Remove a key given its hash. Does nothing if key is not present.
pub fn remove_with_hash(&mut self, key: &K, hash: u64) {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
match map.inner.entry_with_hash(key.clone(), hash) {
Entry::Occupied(e) => {
e.remove();
}
None => UpdateAction::Nothing,
})
.expect("out of memory while removing");
result
Entry::Vacant(_) => {}
};
}
/// Update key using the given function. All the other modifying operations are based on this.
pub fn update_with_fn<F>(&self, key: &K, value_fn: F) -> Result<(), OutOfMemoryError>
where
F: FnOnce(Option<&V>) -> UpdateAction<V>,
{
let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
let mut lock_guard = map.inner.write();
let old_val = lock_guard.get(key);
let action = value_fn(old_val);
match (old_val, action) {
(_, UpdateAction::Nothing) => {}
(_, UpdateAction::Insert(new_val)) => {
let _ = lock_guard.insert(key, new_val);
}
(None, UpdateAction::Remove) => panic!("Remove action with no old value"),
(Some(_), UpdateAction::Remove) => {
let _ = lock_guard.remove(key);
}
}
Ok(())
}
/// Update key using the given function. All the other modifying operations are based on this.
pub fn update_with_fn_at_bucket<F>(
&self,
pos: usize,
value_fn: F,
) -> Result<(), OutOfMemoryError>
where
F: FnOnce(Option<&V>) -> UpdateAction<V>,
{
let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
let mut lock_guard = map.inner.write();
let old_val = lock_guard.get_bucket(pos);
let action = value_fn(old_val.map(|(_k, v)| v));
match (old_val, action) {
(_, UpdateAction::Nothing) => {}
(_, UpdateAction::Insert(_new_val)) => panic!("cannot insert without key"),
(None, UpdateAction::Remove) => panic!("Remove action with no old value"),
(Some((key, _value)), UpdateAction::Remove) => {
let key = key.clone();
let _ = lock_guard.remove(&key);
}
}
Ok(())
/// Optionally return the entry for a bucket at a given index if it exists.
pub fn entry_at_bucket(&mut self, pos: usize) -> Option<OccupiedEntry<'a, '_, K, V>> {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
map.inner.entry_at_bucket(pos)
}
/// Returns the number of buckets in the table.
pub fn get_num_buckets(&self) -> usize {
let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
map.inner.read().get_num_buckets()
map.inner.get_num_buckets()
}
/// Return the key and value stored in bucket with given index. This can be used to
/// iterate through the hash map. (An Iterator might be nicer. The communicator's
/// clock algorithm needs to _slowly_ iterate through all buckets with its clock hand,
/// without holding a lock. If we switch to an Iterator, it must not hold the lock.)
pub fn get_bucket<'e>(&'e self, pos: usize) -> Option<ValueReadGuard<'e, K, V>> {
/// iterate through the hash map.
// TODO: An Iterator might be nicer. The communicator's clock algorithm needs to
// _slowly_ iterate through all buckets with its clock hand, without holding a lock.
// If we switch to an Iterator, it must not hold the lock.
pub fn get_at_bucket(&self, pos: usize) -> Option<&(K, V)> {
let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
let lock_guard = map.inner.read();
match lock_guard.get_bucket(pos) {
None => None,
Some((_key, val_ref)) => {
let val_ptr = std::ptr::from_ref(val_ref);
Some(ValueReadGuard {
_lock_guard: lock_guard,
value: val_ptr,
})
}
if pos >= map.inner.buckets.len() {
return None;
}
let bucket = &map.inner.buckets[pos];
bucket.inner.as_ref()
}
// for metrics
/// Returns the index of the bucket a given value corresponds to.
pub fn get_bucket_for_value(&self, val_ptr: *const V) -> usize {
let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
let origin = map.inner.buckets.as_ptr();
let idx = (val_ptr as usize - origin as usize) / (size_of::<Bucket<K, V>>() as usize);
assert!(idx < map.inner.buckets.len());
idx
}
/// Returns the number of occupied buckets in the table.
pub fn get_num_buckets_in_use(&self) -> usize {
let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
map.inner.read().buckets_in_use as usize
map.inner.buckets_in_use as usize
}
/// Grow
/// Clears all entries in a table. Does not reset any shrinking operations.
pub fn clear(&mut self) {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
let inner = &mut map.inner;
inner.clear()
}
/// Perform an in-place rehash of some region (0..`rehash_buckets`) of the table and reset
/// the `buckets` and `dictionary` slices to be as long as `num_buckets`. Resets the freelist
/// in the process.
fn rehash_dict(
&mut self,
inner: &mut CoreHashMap<'a, K, V>,
buckets_ptr: *mut core::Bucket<K, V>,
end_ptr: *mut u8,
num_buckets: u32,
rehash_buckets: u32,
) {
inner.free_head = INVALID_POS;
let buckets;
let dictionary;
unsafe {
let buckets_end_ptr = buckets_ptr.add(num_buckets as usize);
let dictionary_ptr: *mut u32 = buckets_end_ptr
.byte_add(buckets_end_ptr.align_offset(align_of::<u32>()))
.cast();
let dictionary_size: usize =
end_ptr.byte_offset_from(buckets_end_ptr) as usize / size_of::<u32>();
buckets = std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize);
dictionary = std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size);
}
for i in 0..dictionary.len() {
dictionary[i] = INVALID_POS;
}
for i in 0..rehash_buckets as usize {
if buckets[i].inner.is_none() {
buckets[i].next = inner.free_head;
inner.free_head = i as u32;
continue;
}
let hash = self.hasher.hash_one(&buckets[i].inner.as_ref().unwrap().0);
let pos: usize = (hash % dictionary.len() as u64) as usize;
buckets[i].next = dictionary[pos];
dictionary[pos] = i as u32;
}
inner.dictionary = dictionary;
inner.buckets = buckets;
}
/// Rehash the map without growing or shrinking.
pub fn shuffle(&mut self) {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
let inner = &mut map.inner;
let num_buckets = inner.get_num_buckets() as u32;
let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
let end_ptr: *mut u8 = unsafe { (self.shared_ptr as *mut u8).add(size_bytes) };
let buckets_ptr = inner.buckets.as_mut_ptr();
self.rehash_dict(inner, buckets_ptr, end_ptr, num_buckets, num_buckets);
}
/// Grow the number of buckets within the table.
///
/// 1. grow the underlying shared memory area
/// 2. Initialize new buckets. This overwrites the current dictionary
/// 3. Recalculate the dictionary
pub fn grow(&self, num_buckets: u32) -> Result<(), crate::shmem::Error> {
let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
let mut lock_guard = map.inner.write();
let inner = &mut *lock_guard;
/// 1. Grows the underlying shared memory area
/// 2. Initializes new buckets and overwrites the current dictionary
/// 3. Rehashes the dictionary
pub fn grow(&mut self, num_buckets: u32) -> Result<(), crate::shmem::Error> {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
let inner = &mut map.inner;
let old_num_buckets = inner.buckets.len() as u32;
if num_buckets < old_num_buckets {
@@ -281,7 +347,7 @@ where
.as_ref()
.expect("grow called on a fixed-size hash table");
let size_bytes = HashMapInit::<K, V>::estimate_size(num_buckets);
let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
shmem_handle.set_size(size_bytes)?;
let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
@@ -292,8 +358,7 @@ where
for i in old_num_buckets..num_buckets {
let bucket_ptr = buckets_ptr.add(i as usize);
bucket_ptr.write(core::Bucket {
hash: 0,
next: if i < num_buckets {
next: if i < num_buckets-1 {
i as u32 + 1
} else {
inner.free_head
@@ -303,64 +368,81 @@ where
}
}
// Recalculate the dictionary
let buckets;
let dictionary;
unsafe {
let buckets_end_ptr = buckets_ptr.add(num_buckets as usize);
let dictionary_ptr: *mut u32 = buckets_end_ptr
.byte_add(buckets_end_ptr.align_offset(align_of::<u32>()))
.cast();
let dictionary_size: usize =
end_ptr.byte_offset_from(buckets_end_ptr) as usize / size_of::<u32>();
buckets = std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize);
dictionary = std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size);
}
for i in 0..dictionary.len() {
dictionary[i] = core::INVALID_POS;
}
for i in 0..old_num_buckets as usize {
if buckets[i].inner.is_none() {
continue;
}
let pos: usize = (buckets[i].hash % dictionary.len() as u64) as usize;
buckets[i].next = dictionary[pos];
dictionary[pos] = i as u32;
}
// Finally, update the CoreHashMap struct
inner.dictionary = dictionary;
inner.buckets = buckets;
self.rehash_dict(inner, buckets_ptr, end_ptr, num_buckets, old_num_buckets);
inner.free_head = old_num_buckets;
Ok(())
}
// TODO: Shrinking is a multi-step process that requires co-operation from the caller
//
// 1. The caller must first call begin_shrink(). That forbids allocation of higher-numbered
// buckets.
//
// 2. Next, the caller must evict all entries in higher-numbered buckets.
//
// 3. Finally, call finish_shrink(). This recomputes the dictionary and shrinks the underlying
// shmem area
}
/// Begin a shrink, limiting all new allocations to be in buckets with index below `num_buckets`.
pub fn begin_shrink(&mut self, num_buckets: u32) {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
if num_buckets > map.inner.get_num_buckets() as u32 {
panic!("shrink called with a larger number of buckets");
}
_ = self
.shmem_handle
.as_ref()
.expect("shrink called on a fixed-size hash table");
map.inner.alloc_limit = num_buckets;
}
pub struct ValueReadGuard<'a, K, V> {
_lock_guard: spin::RwLockReadGuard<'a, CoreHashMap<'a, K, V>>,
value: *const V,
}
impl<'a, K, V> Deref for ValueReadGuard<'a, K, V> {
type Target = V;
fn deref(&self) -> &Self::Target {
// SAFETY: The `lock_guard` ensures that the underlying map (and thus the value pointed to
// by `value`) remains valid for the lifetime `'a`. The `value` has been obtained from a
// valid reference within the map.
unsafe { &*self.value }
/// Returns whether a shrink operation is currently in progress.
pub fn is_shrinking(&self) -> bool {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
map.inner.is_shrinking()
}
/// Returns how many entries need to be evicted before shrink can complete.
pub fn shrink_remaining(&self) -> usize {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
let inner = &mut map.inner;
if !inner.is_shrinking() {
panic!("shrink_remaining called when no ongoing shrink")
} else {
inner.buckets_in_use
.checked_sub(inner.alloc_limit)
.unwrap_or(0)
as usize
}
}
/// Complete a shrink after caller has evicted entries, removing the unused buckets and rehashing.
pub fn finish_shrink(&mut self) -> Result<(), crate::shmem::Error> {
let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
let inner = &mut map.inner;
if !inner.is_shrinking() {
panic!("called finish_shrink when no shrink is in progress");
}
let num_buckets = inner.alloc_limit;
if inner.get_num_buckets() == num_buckets as usize {
return Ok(());
} else if inner.buckets_in_use > num_buckets {
panic!("called finish_shrink before enough entries were removed");
}
for i in (num_buckets as usize)..inner.buckets.len() {
if let Some((k, v)) = inner.buckets[i].inner.take() {
// alloc bucket increases buckets in use, so need to decrease since we're just moving
inner.buckets_in_use -= 1;
inner.alloc_bucket(k, v).unwrap();
}
}
let shmem_handle = self
.shmem_handle
.as_ref()
.expect("shrink called on a fixed-size hash table");
let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
shmem_handle.set_size(size_bytes)?;
let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
let buckets_ptr = inner.buckets.as_mut_ptr();
self.rehash_dict(inner, buckets_ptr, end_ptr, num_buckets, num_buckets);
inner.alloc_limit = INVALID_POS;
Ok(())
}
}

View File

@@ -1,37 +1,48 @@
//! Simple hash table with chaining
//!
//! # Resizing
//!
//! Simple hash table with chaining.
use std::hash::{DefaultHasher, Hash, Hasher};
use std::hash::Hash;
use std::mem::MaybeUninit;
use crate::hash::entry::{Entry, OccupiedEntry, PrevPos, VacantEntry};
pub(crate) const INVALID_POS: u32 = u32::MAX;
// Bucket
/// Fundamental storage unit within the hash table. Either empty or contains a key-value pair.
/// Always part of a chain of some kind (either a freelist if empty or a hash chain if full).
pub(crate) struct Bucket<K, V> {
pub(crate) hash: u64,
pub(crate) next: u32,
/// Index of next bucket in the chain.
pub(crate) next: u32,
/// Key-value pair contained within bucket.
pub(crate) inner: Option<(K, V)>,
}
/// Core hash table implementation.
pub(crate) struct CoreHashMap<'a, K, V> {
/// Dictionary used to map hashes to bucket indices.
pub(crate) dictionary: &'a mut [u32],
/// Buckets containing key-value pairs.
pub(crate) buckets: &'a mut [Bucket<K, V>],
/// Head of the freelist.
pub(crate) free_head: u32,
// metrics
/// Maximum index of a bucket allowed to be allocated. INVALID_POS if no limit.
pub(crate) alloc_limit: u32,
/// The number of currently occupied buckets.
pub(crate) buckets_in_use: u32,
// Unclear what the purpose of this is.
pub(crate) _user_list_head: u32,
}
/// Error for when there are no empty buckets left but one is needed.
#[derive(Debug)]
pub struct FullError();
impl<'a, K, V> CoreHashMap<'a, K, V>
impl<'a, K: Hash + Eq, V> CoreHashMap<'a, K, V>
where
K: Clone + Hash + Eq,
{
const FILL_FACTOR: f32 = 0.60;
/// Estimate the size of data contained within the the hash map.
pub fn estimate_size(num_buckets: u32) -> usize {
let mut size = 0;
@@ -43,60 +54,34 @@ where
as usize;
size
}
pub fn new(num_buckets: u32, area: &'a mut [u8]) -> CoreHashMap<'a, K, V> {
let len = area.len();
let mut ptr: *mut u8 = area.as_mut_ptr();
let end_ptr: *mut u8 = unsafe { area.as_mut_ptr().add(len) };
// carve out the buckets
ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<Bucket<K, V>>())) };
let buckets_ptr = ptr;
ptr = unsafe { ptr.add(size_of::<Bucket<K, V>>() * num_buckets as usize) };
// use remaining space for the dictionary
ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<u32>())) };
let dictionary_ptr = ptr;
assert!(ptr.addr() < end_ptr.addr());
let dictionary_size = unsafe { end_ptr.byte_offset_from(ptr) / size_of::<u32>() as isize };
assert!(dictionary_size > 0);
}
pub fn new(
buckets: &'a mut [MaybeUninit<Bucket<K, V>>],
dictionary: &'a mut [MaybeUninit<u32>],
) -> CoreHashMap<'a, K, V> {
// Initialize the buckets
let buckets = {
let buckets_ptr: *mut MaybeUninit<Bucket<K, V>> = buckets_ptr.cast();
let buckets =
unsafe { std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize) };
for i in 0..buckets.len() {
buckets[i].write(Bucket {
hash: 0,
next: if i < buckets.len() - 1 {
i as u32 + 1
} else {
INVALID_POS
},
inner: None,
});
}
// TODO: use std::slice::assume_init_mut() once it stabilizes
unsafe { std::slice::from_raw_parts_mut(buckets_ptr.cast(), num_buckets as usize) }
};
for i in 0..buckets.len() {
buckets[i].write(Bucket {
next: if i < buckets.len() - 1 {
i as u32 + 1
} else {
INVALID_POS
},
inner: None,
});
}
// Initialize the dictionary
let dictionary = {
let dictionary_ptr: *mut MaybeUninit<u32> = dictionary_ptr.cast();
let dictionary =
unsafe { std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size as usize) };
for i in 0..dictionary.len() {
dictionary[i].write(INVALID_POS);
}
for i in 0..dictionary.len() {
dictionary[i].write(INVALID_POS);
}
// TODO: use std::slice::assume_init_mut() once it stabilizes
unsafe {
std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize)
}
// TODO: use std::slice::assume_init_mut() once it stabilizes
let buckets =
unsafe { std::slice::from_raw_parts_mut(buckets.as_mut_ptr().cast(), buckets.len()) };
let dictionary = unsafe {
std::slice::from_raw_parts_mut(dictionary.as_mut_ptr().cast(), dictionary.len())
};
CoreHashMap {
@@ -104,14 +89,13 @@ where
buckets,
free_head: 0,
buckets_in_use: 0,
_user_list_head: INVALID_POS,
alloc_limit: INVALID_POS,
}
}
pub fn get(&self, key: &K) -> Option<&V> {
let mut hasher = DefaultHasher::new();
key.hash(&mut hasher);
let hash = hasher.finish();
/// Get the value associated with a key (if it exists) given its hash.
pub fn get_with_hash(&self, key: &K, hash: u64) -> Option<&V> {
let mut next = self.dictionary[hash as usize % self.dictionary.len()];
loop {
if next == INVALID_POS {
@@ -127,107 +111,132 @@ where
}
}
pub fn insert(&mut self, key: &K, value: V) -> Result<(), FullError> {
let mut hasher = DefaultHasher::new();
key.hash(&mut hasher);
let hash = hasher.finish();
let first = self.dictionary[hash as usize % self.dictionary.len()];
/// Get the `Entry` associated with a key given hash. This should be used for updates/inserts.
pub fn entry_with_hash(&mut self, key: K, hash: u64) -> Entry<'a, '_, K, V> {
let dict_pos = hash as usize % self.dictionary.len();
let first = self.dictionary[dict_pos];
if first == INVALID_POS {
// no existing entry
let pos = self.alloc_bucket(key.clone(), value, hash)?;
if pos == INVALID_POS {
return Err(FullError());
}
self.dictionary[hash as usize % self.dictionary.len()] = pos;
return Ok(());
return Entry::Vacant(VacantEntry {
map: self,
key,
dict_pos: dict_pos as u32,
});
}
let mut prev_pos = PrevPos::First(dict_pos as u32);
let mut next = first;
loop {
let bucket = &mut self.buckets[next as usize];
let (bucket_key, bucket_value) = bucket.inner.as_mut().expect("entry is in use");
if bucket_key == key {
// found existing entry, update its value
*bucket_value = value;
return Ok(());
let (bucket_key, _bucket_value) = bucket.inner.as_mut().expect("entry is in use");
if *bucket_key == key {
// found existing entry
return Entry::Occupied(OccupiedEntry {
map: self,
_key: key,
prev_pos,
bucket_pos: next,
});
}
if bucket.next == INVALID_POS {
// No existing entry found. Append to the chain
let pos = self.alloc_bucket(key.clone(), value, hash)?;
if pos == INVALID_POS {
return Err(FullError());
}
self.buckets[next as usize].next = pos;
return Ok(());
// No existing entry
return Entry::Vacant(VacantEntry {
map: self,
key,
dict_pos: dict_pos as u32,
});
}
prev_pos = PrevPos::Chained(next);
next = bucket.next;
}
}
pub fn remove(&mut self, key: &K) -> Result<(), FullError> {
let mut hasher = DefaultHasher::new();
key.hash(&mut hasher);
let hash = hasher.finish();
let mut next = self.dictionary[hash as usize % self.dictionary.len()];
let mut prev_pos: u32 = INVALID_POS;
loop {
if next == INVALID_POS {
// no existing entry
return Ok(());
}
let bucket = &mut self.buckets[next as usize];
let (bucket_key, _) = bucket.inner.as_mut().expect("entry is in use");
if bucket_key == key {
// found existing entry, unlink it from the chain
if prev_pos == INVALID_POS {
self.dictionary[hash as usize % self.dictionary.len()] = bucket.next;
} else {
self.buckets[prev_pos as usize].next = bucket.next;
}
// and add it to the freelist
let bucket = &mut self.buckets[next as usize];
bucket.hash = 0;
bucket.inner = None;
bucket.next = self.free_head;
self.free_head = next;
self.buckets_in_use -= 1;
return Ok(());
}
prev_pos = next;
next = bucket.next;
}
}
/// Get number of buckets in map.
pub fn get_num_buckets(&self) -> usize {
self.buckets.len()
}
pub fn get_bucket(&self, pos: usize) -> Option<&(K, V)> {
if pos >= self.buckets.len() {
return None;
/// Returns whether there is an ongoing shrink operation.
pub fn is_shrinking(&self) -> bool {
self.alloc_limit != INVALID_POS
}
/// Clears all entries from the hashmap.
///
/// Does not reset any allocation limits, but does clear any entries beyond them.
pub fn clear(&mut self) {
for i in 0..self.buckets.len() {
self.buckets[i] = Bucket {
next: if i < self.buckets.len() - 1 {
i as u32 + 1
} else {
INVALID_POS
},
inner: None,
}
}
self.buckets[pos].inner.as_ref()
for i in 0..self.dictionary.len() {
self.dictionary[i] = INVALID_POS;
}
self.buckets_in_use = 0;
}
/// Optionally gets the entry at an index if it is occupied.
pub fn entry_at_bucket(&mut self, pos: usize) -> Option<OccupiedEntry<'a, '_, K, V>> {
if pos >= self.buckets.len() {
return None;
}
let entry = self.buckets[pos].inner.as_ref();
match entry {
Some((key, _)) => Some(OccupiedEntry {
_key: key.clone(),
bucket_pos: pos as u32,
prev_pos: PrevPos::Unknown,
map: self,
}),
_ => None,
}
}
fn alloc_bucket(&mut self, key: K, value: V, hash: u64) -> Result<u32, FullError> {
let pos = self.free_head;
if pos == INVALID_POS {
return Err(FullError());
}
/// Find the position of an unused bucket via the freelist and initialize it.
pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result<u32, FullError> {
let mut pos = self.free_head;
let bucket = &mut self.buckets[pos as usize];
self.free_head = bucket.next;
self.buckets_in_use += 1;
// Find the first bucket we're *allowed* to use.
let mut prev = PrevPos::First(self.free_head);
while pos != INVALID_POS && pos >= self.alloc_limit {
let bucket = &mut self.buckets[pos as usize];
prev = PrevPos::Chained(pos);
pos = bucket.next;
}
if pos == INVALID_POS {
return Err(FullError());
}
bucket.hash = hash;
// Repair the freelist.
match prev {
PrevPos::First(_) => {
let next_pos = self.buckets[pos as usize].next;
self.free_head = next_pos;
}
PrevPos::Chained(p) => if p != INVALID_POS {
let next_pos = self.buckets[pos as usize].next;
self.buckets[p as usize].next = next_pos;
},
PrevPos::Unknown => unreachable!()
}
// Initialize the bucket.
let bucket = &mut self.buckets[pos as usize];
self.buckets_in_use += 1;
bucket.next = INVALID_POS;
bucket.inner = Some((key, value));
return Ok(pos);
}
}

View File

@@ -0,0 +1,111 @@
//! Like std::collections::hash_map::Entry;
use crate::hash::core::{CoreHashMap, FullError, INVALID_POS};
use std::hash::Hash;
use std::mem;
/// View into an entry in the map (either vacant or occupied).
pub enum Entry<'a, 'b, K, V> {
Occupied(OccupiedEntry<'a, 'b, K, V>),
Vacant(VacantEntry<'a, 'b, K, V>),
}
/// Enum representing the previous position within a chain.
#[derive(Clone, Copy)]
pub(crate) enum PrevPos {
/// Starting index within the dictionary.
First(u32),
/// Regular index within the buckets.
Chained(u32),
/// Unknown - e.g. the associated entry was retrieved by index instead of chain.
Unknown,
}
/// View into an occupied entry within the map.
pub struct OccupiedEntry<'a, 'b, K, V> {
/// Mutable reference to the map containing this entry.
pub(crate) map: &'b mut CoreHashMap<'a, K, V>,
/// The key of the occupied entry
pub(crate) _key: K,
/// The index of the previous entry in the chain.
pub(crate) prev_pos: PrevPos,
/// The position of the bucket in the CoreHashMap's buckets array.
pub(crate) bucket_pos: u32,
}
impl<'a, 'b, K, V> OccupiedEntry<'a, 'b, K, V> {
pub fn get(&self) -> &V {
&self.map.buckets[self.bucket_pos as usize]
.inner
.as_ref()
.unwrap()
.1
}
pub fn get_mut(&mut self) -> &mut V {
&mut self.map.buckets[self.bucket_pos as usize]
.inner
.as_mut()
.unwrap()
.1
}
/// Inserts a value into the entry, replacing (and returning) the existing value.
pub fn insert(&mut self, value: V) -> V {
let bucket = &mut self.map.buckets[self.bucket_pos as usize];
// This assumes inner is Some, which it must be for an OccupiedEntry
let old_value = mem::replace(&mut bucket.inner.as_mut().unwrap().1, value);
old_value
}
/// Removes the entry from the hash map, returning the value originally stored within it.
pub fn remove(self) -> V {
// CoreHashMap::remove returns Option<(K, V)>. We know it's Some for an OccupiedEntry.
let bucket = &mut self.map.buckets[self.bucket_pos as usize];
// unlink it from the chain
match self.prev_pos {
PrevPos::First(dict_pos) => self.map.dictionary[dict_pos as usize] = bucket.next,
PrevPos::Chained(bucket_pos) => {
self.map.buckets[bucket_pos as usize].next = bucket.next
},
PrevPos::Unknown => panic!("can't safely remove entry with unknown previous entry"),
}
// and add it to the freelist
let bucket = &mut self.map.buckets[self.bucket_pos as usize];
let old_value = bucket.inner.take();
bucket.next = self.map.free_head;
self.map.free_head = self.bucket_pos;
self.map.buckets_in_use -= 1;
return old_value.unwrap().1;
}
}
/// An abstract view into a vacant entry within the map.
pub struct VacantEntry<'a, 'b, K, V> {
/// Mutable reference to the map containing this entry.
pub(crate) map: &'b mut CoreHashMap<'a, K, V>,
/// The key to be inserted into this entry.
pub(crate) key: K,
/// The position within the dictionary corresponding to the key's hash.
pub(crate) dict_pos: u32,
}
impl<'a, 'b, K: Clone + Hash + Eq, V> VacantEntry<'a, 'b, K, V> {
/// Insert a value into the vacant entry, finding and populating an empty bucket in the process.
pub fn insert(self, value: V) -> Result<&'b mut V, FullError> {
let pos = self.map.alloc_bucket(self.key, value)?;
if pos == INVALID_POS {
return Err(FullError());
}
let bucket = &mut self.map.buckets[pos as usize];
bucket.next = self.map.dictionary[self.dict_pos as usize];
self.map.dictionary[self.dict_pos as usize] = pos;
let result = &mut self.map.buckets[pos as usize].inner.as_mut().unwrap().1;
return Ok(result);
}
}

View File

@@ -1,11 +1,13 @@
use std::collections::BTreeMap;
use std::collections::HashSet;
use std::fmt::{Debug, Formatter};
use std::mem::uninitialized;
use std::mem::MaybeUninit;
use std::sync::atomic::{AtomicUsize, Ordering};
use crate::hash::HashMapAccess;
use crate::hash::HashMapInit;
use crate::hash::UpdateAction;
use crate::hash::Entry;
use crate::shmem::ShmemHandle;
use rand::seq::SliceRandom;
@@ -35,25 +37,29 @@ impl<'a> From<&'a [u8]> for TestKey {
}
}
fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
const MAX_MEM_SIZE: usize = 10000000;
let shmem = ShmemHandle::new("test_inserts", 0, MAX_MEM_SIZE).unwrap();
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(100000, shmem);
let w = init_struct.attach_writer();
fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
let mut w = HashMapInit::<TestKey, usize>::new_resizeable_named(
100000, 120000, "test_inserts"
).attach_writer();
for (idx, k) in keys.iter().enumerate() {
let res = w.insert(&(*k).into(), idx);
assert!(res.is_ok());
let hash = w.get_hash_value(&(*k).into());
let res = w.entry_with_hash((*k).into(), hash);
match res {
Entry::Occupied(mut e) => { e.insert(idx); }
Entry::Vacant(e) => {
let res = e.insert(idx);
assert!(res.is_ok());
},
};
}
for (idx, k) in keys.iter().enumerate() {
let x = w.get(&(*k).into());
let hash = w.get_hash_value(&(*k).into());
let x = w.get_with_hash(&(*k).into(), hash);
let value = x.as_deref().copied();
assert_eq!(value, Some(idx));
}
//eprintln!("stats: {:?}", tree_writer.get_statistics());
}
#[test]
@@ -121,11 +127,9 @@ struct TestOp(TestKey, Option<usize>);
fn apply_op(
op: &TestOp,
sut: &HashMapAccess<TestKey, TestValue>,
map: &mut HashMapAccess<TestKey, usize>,
shadow: &mut BTreeMap<TestKey, usize>,
) {
eprintln!("applying op: {op:?}");
// apply the change to the shadow tree first
let shadow_existing = if let Some(v) = op.1 {
shadow.insert(op.0, v)
@@ -133,33 +137,78 @@ fn apply_op(
shadow.remove(&op.0)
};
// apply to Art tree
sut.update_with_fn(&op.0, |existing| {
assert_eq!(existing.map(TestValue::load), shadow_existing);
let hash = map.get_hash_value(&op.0);
let entry = map.entry_with_hash(op.0, hash);
let hash_existing = match op.1 {
Some(new) => {
match entry {
Entry::Occupied(mut e) => Some(e.insert(new)),
Entry::Vacant(e) => { e.insert(new).unwrap(); None },
}
},
None => {
match entry {
Entry::Occupied(e) => Some(e.remove()),
Entry::Vacant(_) => None,
}
},
};
match (existing, op.1) {
(None, None) => UpdateAction::Nothing,
(None, Some(new_val)) => UpdateAction::Insert(TestValue::new(new_val)),
(Some(_old_val), None) => UpdateAction::Remove,
(Some(old_val), Some(new_val)) => {
old_val.0.store(new_val, Ordering::Relaxed);
UpdateAction::Nothing
}
}
})
.expect("out of memory");
assert_eq!(shadow_existing, hash_existing);
}
fn do_random_ops(
num_ops: usize,
size: u32,
del_prob: f64,
writer: &mut HashMapAccess<TestKey, usize>,
shadow: &mut BTreeMap<TestKey, usize>,
rng: &mut rand::rngs::ThreadRng,
) {
for i in 0..num_ops {
let key: TestKey = ((rng.next_u32() % size) as u128).into();
let op = TestOp(key, if rng.random_bool(del_prob) { Some(i) } else { None });
apply_op(&op, writer, shadow);
}
}
fn do_deletes(
num_ops: usize,
writer: &mut HashMapAccess<TestKey, usize>,
shadow: &mut BTreeMap<TestKey, usize>,
) {
for i in 0..num_ops {
let (k, _) = shadow.pop_first().unwrap();
let hash = writer.get_hash_value(&k);
writer.remove_with_hash(&k, hash);
}
}
fn do_shrink(
writer: &mut HashMapAccess<TestKey, usize>,
shadow: &mut BTreeMap<TestKey, usize>,
from: u32,
to: u32
) {
writer.begin_shrink(to);
while writer.get_num_buckets_in_use() > to as usize {
let (k, _) = shadow.pop_first().unwrap();
let hash = writer.get_hash_value(&k);
let entry = writer.entry_with_hash(k, hash);
if let Entry::Occupied(mut e) = entry {
e.remove();
}
}
writer.finish_shrink().unwrap();
}
#[test]
fn random_ops() {
const MAX_MEM_SIZE: usize = 10000000;
let shmem = ShmemHandle::new("test_inserts", 0, MAX_MEM_SIZE).unwrap();
let init_struct = HashMapInit::<TestKey, TestValue>::init_in_shmem(100000, shmem);
let writer = init_struct.attach_writer();
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
100000, 120000, "test_random"
).attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();
let mut rng = rand::rng();
for i in 0..100000 {
@@ -167,54 +216,169 @@ fn random_ops() {
let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
apply_op(&op, &writer, &mut shadow);
apply_op(&op, &mut writer, &mut shadow);
if i % 1000 == 0 {
eprintln!("{i} ops processed");
//eprintln!("stats: {:?}", tree_writer.get_statistics());
//test_iter(&tree_writer, &shadow);
}
}
}
#[test]
fn test_shuffle() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
1000, 1200, "test_shuf"
).attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
writer.shuffle();
do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
}
#[test]
fn test_grow() {
const MEM_SIZE: usize = 10000000;
let shmem = ShmemHandle::new("test_grow", 0, MEM_SIZE).unwrap();
let init_struct = HashMapInit::<TestKey, TestValue>::init_in_shmem(1000, shmem);
let writer = init_struct.attach_writer();
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
1000, 2000, "test_grow"
).attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
for i in 0..10000 {
let key: TestKey = ((rng.next_u32() % 1000) as u128).into();
let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
apply_op(&op, &writer, &mut shadow);
if i % 1000 == 0 {
eprintln!("{i} ops processed");
//eprintln!("stats: {:?}", tree_writer.get_statistics());
//test_iter(&tree_writer, &shadow);
}
}
do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
writer.grow(1500).unwrap();
for i in 0..10000 {
let key: TestKey = ((rng.next_u32() % 1500) as u128).into();
let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
apply_op(&op, &writer, &mut shadow);
if i % 1000 == 0 {
eprintln!("{i} ops processed");
//eprintln!("stats: {:?}", tree_writer.get_statistics());
//test_iter(&tree_writer, &shadow);
}
}
do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
}
#[test]
fn test_shrink() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
1500, 2000, "test_shrink"
).attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
do_shrink(&mut writer, &mut shadow, 1500, 1000);
do_deletes(500, &mut writer, &mut shadow);
do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng);
assert!(writer.get_num_buckets_in_use() <= 1000);
}
#[test]
fn test_shrink_grow_seq() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
1000, 20000, "test_grow_seq"
).attach_writer();
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
let mut rng = rand::rng();
do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng);
eprintln!("Shrinking to 750");
do_shrink(&mut writer, &mut shadow, 1000, 750);
do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng);
eprintln!("Growing to 1500");
writer.grow(1500).unwrap();
do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng);
eprintln!("Shrinking to 200");
do_shrink(&mut writer, &mut shadow, 1500, 200);
do_deletes(100, &mut writer, &mut shadow);
do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
eprintln!("Growing to 10k");
writer.grow(10000).unwrap();
do_random_ops(10000, 5000, 0.25, &mut writer, &mut shadow, &mut rng);
}
#[test]
fn test_bucket_ops() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
1000, 1200, "test_bucket_ops"
).attach_writer();
let hash = writer.get_hash_value(&1.into());
match writer.entry_with_hash(1.into(), hash) {
Entry::Occupied(mut e) => { e.insert(2); },
Entry::Vacant(e) => { e.insert(2).unwrap(); },
}
assert_eq!(writer.get_num_buckets_in_use(), 1);
assert_eq!(writer.get_num_buckets(), 1000);
assert_eq!(writer.get_with_hash(&1.into(), hash), Some(&2));
let pos = match writer.entry_with_hash(1.into(), hash) {
Entry::Occupied(e) => {
assert_eq!(e._key, 1.into());
let pos = e.bucket_pos as usize;
assert_eq!(writer.entry_at_bucket(pos).unwrap()._key, 1.into());
assert_eq!(writer.get_at_bucket(pos), Some(&(1.into(), 2)));
pos
},
Entry::Vacant(_) => { panic!("Insert didn't affect entry"); },
};
let ptr: *const usize = writer.get_with_hash(&1.into(), hash).unwrap();
assert_eq!(writer.get_bucket_for_value(ptr), pos);
writer.remove_with_hash(&1.into(), hash);
assert_eq!(writer.get_with_hash(&1.into(), hash), None);
}
#[test]
fn test_shrink_zero() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
1500, 2000, "test_shrink_zero"
).attach_writer();
writer.begin_shrink(0);
for i in 0..1500 {
writer.entry_at_bucket(i).map(|x| x.remove());
}
writer.finish_shrink().unwrap();
assert_eq!(writer.get_num_buckets_in_use(), 0);
let hash = writer.get_hash_value(&1.into());
let entry = writer.entry_with_hash(1.into(), hash);
if let Entry::Vacant(v) = entry {
assert!(v.insert(2).is_err());
} else {
panic!("Somehow got non-vacant entry in empty map.")
}
writer.grow(50).unwrap();
let entry = writer.entry_with_hash(1.into(), hash);
if let Entry::Vacant(v) = entry {
assert!(v.insert(2).is_ok());
} else {
panic!("Somehow got non-vacant entry in empty map.")
}
assert_eq!(writer.get_num_buckets_in_use(), 1);
}
#[test]
#[should_panic]
fn test_grow_oom() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
1500, 2000, "test_grow_oom"
).attach_writer();
writer.grow(20000).unwrap();
}
#[test]
#[should_panic]
fn test_shrink_bigger() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
1500, 2500, "test_shrink_bigger"
).attach_writer();
writer.begin_shrink(2000);
}
#[test]
#[should_panic]
fn test_shrink_early_finish() {
let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(
1500, 2500, "test_shrink_early_finish"
).attach_writer();
writer.finish_shrink().unwrap();
}
#[test]
#[should_panic]
fn test_shrink_fixed_size() {
let mut area = [MaybeUninit::uninit(); 10000];
let init_struct = HashMapInit::<TestKey, usize>::with_fixed(3, &mut area);
let mut writer = init_struct.attach_writer();
writer.begin_shrink(1);
}

View File

@@ -1,6 +1,5 @@
# pgxs/neon/Makefile
MODULE_big = neon
OBJS = \
$(WIN32RES) \

View File

@@ -36,4 +36,4 @@ neon-shmem.workspace = true
utils.workspace = true
[build-dependencies]
cbindgen.workspace = true
cbindgen.workspace = true

View File

@@ -120,4 +120,3 @@ the upstream AIO worker processes do.
### Compute <-> pageserver protocol
The protocol between Compute and the pageserver is based on gRPC. See `protos/`.

View File

@@ -2248,4 +2248,3 @@ get_prewarm_info(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
}