From fc35be0397fb26a9da0a290446c31330fbaee64d Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 23 Jul 2025 01:46:17 +0300 Subject: [PATCH] Remove the half-baked Adaptive Radix Tree implementation We are committed to using the resizeable hash table for now. ART is a great data structure, but it's too much for now. Maybe later. --- Cargo.lock | 14 - Cargo.toml | 1 - libs/neonart/Cargo.toml | 14 - libs/neonart/src/algorithm.rs | 599 --------- .../neonart/src/algorithm/lock_and_version.rs | 117 -- libs/neonart/src/algorithm/node_ptr.rs | 1099 ----------------- libs/neonart/src/algorithm/node_ref.rs | 349 ------ libs/neonart/src/allocator.rs | 156 --- libs/neonart/src/allocator/block.rs | 191 --- libs/neonart/src/allocator/multislab.rs | 33 - libs/neonart/src/allocator/slab.rs | 433 ------- libs/neonart/src/allocator/static.rs | 44 - libs/neonart/src/epoch.rs | 142 --- libs/neonart/src/lib.rs | 583 --------- libs/neonart/src/tests.rs | 236 ---- 15 files changed, 4011 deletions(-) delete mode 100644 libs/neonart/Cargo.toml delete mode 100644 libs/neonart/src/algorithm.rs delete mode 100644 libs/neonart/src/algorithm/lock_and_version.rs delete mode 100644 libs/neonart/src/algorithm/node_ptr.rs delete mode 100644 libs/neonart/src/algorithm/node_ref.rs delete mode 100644 libs/neonart/src/allocator.rs delete mode 100644 libs/neonart/src/allocator/block.rs delete mode 100644 libs/neonart/src/allocator/multislab.rs delete mode 100644 libs/neonart/src/allocator/slab.rs delete mode 100644 libs/neonart/src/allocator/static.rs delete mode 100644 libs/neonart/src/epoch.rs delete mode 100644 libs/neonart/src/lib.rs delete mode 100644 libs/neonart/src/tests.rs diff --git a/Cargo.lock b/Cargo.lock index 5e8d396519..01f78ddcbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3942,17 +3942,6 @@ dependencies = [ "xxhash-rust", ] -[[package]] -name = "neonart" -version = "0.1.0" -dependencies = [ - "crossbeam-utils", - "rand 0.9.1", - "rand_distr", - "spin", - "tracing", -] - [[package]] name = "never-say-never" version = "6.6.666" @@ -6996,9 +6985,6 @@ name = "spin" version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -dependencies = [ - "lock_api", -] [[package]] name = "spinning_top" diff --git a/Cargo.toml b/Cargo.toml index fba334d614..64d0a55196 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,6 @@ members = [ "libs/pq_proto", "libs/tenant_size_model", "libs/metrics", - "libs/neonart", "libs/postgres_connection", "libs/remote_storage", "libs/tracing-utils", diff --git a/libs/neonart/Cargo.toml b/libs/neonart/Cargo.toml deleted file mode 100644 index 915269e0cb..0000000000 --- a/libs/neonart/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "neonart" -version = "0.1.0" -edition.workspace = true -license.workspace = true - -[dependencies] -crossbeam-utils.workspace = true -spin.workspace = true -tracing.workspace = true - -[dev-dependencies] -rand = "0.9.1" -rand_distr = "0.5.1" diff --git a/libs/neonart/src/algorithm.rs b/libs/neonart/src/algorithm.rs deleted file mode 100644 index 14cccfca6f..0000000000 --- a/libs/neonart/src/algorithm.rs +++ /dev/null @@ -1,599 +0,0 @@ -mod lock_and_version; -pub(crate) mod node_ptr; -mod node_ref; - -use std::vec::Vec; - -use crate::algorithm::lock_and_version::ConcurrentUpdateError; -use crate::algorithm::node_ptr::MAX_PREFIX_LEN; -use crate::algorithm::node_ref::{NewNodeRef, NodeRef, ReadLockedNodeRef, WriteLockedNodeRef}; -use crate::allocator::OutOfMemoryError; - -use crate::TreeWriteGuard; -use crate::UpdateAction; -use crate::allocator::ArtAllocator; -use crate::epoch::EpochPin; -use crate::{Key, Value}; - -pub(crate) type RootPtr = node_ptr::NodePtr; - -#[derive(Debug)] -pub enum ArtError { - ConcurrentUpdate, // need to retry - OutOfMemory, -} - -impl From for ArtError { - fn from(_: ConcurrentUpdateError) -> ArtError { - ArtError::ConcurrentUpdate - } -} - -impl From for ArtError { - fn from(_: OutOfMemoryError) -> ArtError { - ArtError::OutOfMemory - } -} - -pub fn new_root( - allocator: &impl ArtAllocator, -) -> Result, OutOfMemoryError> { - node_ptr::new_root(allocator) -} - -pub(crate) fn search<'e, K: Key, V: Value>( - key: &K, - root: RootPtr, - epoch_pin: &'e EpochPin, -) -> Option<&'e V> { - loop { - let root_ref = NodeRef::from_root_ptr(root); - if let Ok(result) = lookup_recurse(key.as_bytes(), root_ref, None, epoch_pin) { - break result; - } - // retry - } -} - -pub(crate) fn iter_next<'e, V: Value>( - key: &[u8], - root: RootPtr, - epoch_pin: &'e EpochPin, -) -> Option<(Vec, &'e V)> { - loop { - let mut path = Vec::new(); - let root_ref = NodeRef::from_root_ptr(root); - - match next_recurse(key, &mut path, root_ref, epoch_pin) { - Ok(Some(v)) => { - assert_eq!(path.len(), key.len()); - break Some((path, v)); - } - Ok(None) => break None, - Err(ConcurrentUpdateError()) => { - // retry - continue; - } - } - } -} - -pub(crate) fn update_fn<'e, 'g, K: Key, V: Value, A: ArtAllocator, F>( - key: &K, - value_fn: F, - root: RootPtr, - guard: &'g mut TreeWriteGuard<'e, K, V, A>, -) -> Result<(), OutOfMemoryError> -where - F: FnOnce(Option<&V>) -> UpdateAction, -{ - let value_fn_cell = std::cell::Cell::new(Some(value_fn)); - loop { - let root_ref = NodeRef::from_root_ptr(root); - let this_value_fn = |arg: Option<&V>| value_fn_cell.take().unwrap()(arg); - let key_bytes = key.as_bytes(); - - match update_recurse( - key_bytes, - this_value_fn, - root_ref, - None, - None, - guard, - 0, - key_bytes, - ) { - Ok(()) => break Ok(()), - Err(ArtError::ConcurrentUpdate) => { - continue; // retry - } - Err(ArtError::OutOfMemory) => break Err(OutOfMemoryError()), - } - } -} - -// Error means you must retry. -// -// This corresponds to the 'lookupOpt' function in the paper -#[allow(clippy::only_used_in_recursion)] -fn lookup_recurse<'e, V: Value>( - key: &[u8], - node: NodeRef<'e, V>, - parent: Option>, - epoch_pin: &'e EpochPin, -) -> Result, ConcurrentUpdateError> { - let rnode = node.read_lock_or_restart()?; - if let Some(parent) = parent { - parent.read_unlock_or_restart()?; - } - - // check if the prefix matches, may increment level - let prefix_len = if let Some(prefix_len) = rnode.prefix_matches(key) { - prefix_len - } else { - rnode.read_unlock_or_restart()?; - return Ok(None); - }; - - if rnode.is_leaf() { - assert_eq!(key.len(), prefix_len); - let vptr = rnode.get_leaf_value_ptr()?; - // safety: It's OK to return a ref of the pointer because we checked the version - // and the lifetime of 'epoch_pin' enforces that the reference is only accessible - // as long as the epoch is pinned. - let v = unsafe { vptr.as_ref().unwrap() }; - return Ok(Some(v)); - } - - let key = &key[prefix_len..]; - - // find child (or leaf value) - let next_node = rnode.find_child_or_restart(key[0])?; - - match next_node { - None => Ok(None), // key not found - Some(child) => lookup_recurse(&key[1..], child, Some(rnode), epoch_pin), - } -} - -#[allow(clippy::only_used_in_recursion)] -fn next_recurse<'e, V: Value>( - min_key: &[u8], - path: &mut Vec, - node: NodeRef<'e, V>, - epoch_pin: &'e EpochPin, -) -> Result, ConcurrentUpdateError> { - let rnode = node.read_lock_or_restart()?; - let prefix = rnode.get_prefix(); - if !prefix.is_empty() { - path.extend_from_slice(prefix); - } - - use std::cmp::Ordering; - let comparison = path.as_slice().cmp(&min_key[0..path.len()]); - if comparison == Ordering::Less { - rnode.read_unlock_or_restart()?; - return Ok(None); - } - - if rnode.is_leaf() { - assert_eq!(path.len(), min_key.len()); - let vptr = rnode.get_leaf_value_ptr()?; - // safety: It's OK to return a ref of the pointer because we checked the version - // and the lifetime of 'epoch_pin' enforces that the reference is only accessible - // as long as the epoch is pinned. - let v = unsafe { vptr.as_ref().unwrap() }; - return Ok(Some(v)); - } - - let mut min_key_byte = match comparison { - Ordering::Less => unreachable!(), // checked this above already - Ordering::Equal => min_key[path.len()], - Ordering::Greater => 0, - }; - - loop { - match rnode.find_next_child_or_restart(min_key_byte)? { - None => { - return Ok(None); - } - Some((key_byte, child_ref)) => { - let path_len = path.len(); - path.push(key_byte); - let result = next_recurse(min_key, path, child_ref, epoch_pin)?; - if result.is_some() { - return Ok(result); - } - if key_byte == u8::MAX { - return Ok(None); - } - path.truncate(path_len); - min_key_byte = key_byte + 1; - } - } - } -} - -// This corresponds to the 'insertOpt' function in the paper -#[allow(clippy::only_used_in_recursion)] -#[allow(clippy::too_many_arguments)] -pub(crate) fn update_recurse<'e, K: Key, V: Value, A: ArtAllocator, F>( - key: &[u8], - value_fn: F, - node: NodeRef<'e, V>, - rparent: Option<(ReadLockedNodeRef, u8)>, - rgrandparent: Option<(ReadLockedNodeRef, u8)>, - guard: &'_ mut TreeWriteGuard<'e, K, V, A>, - level: usize, - orig_key: &[u8], -) -> Result<(), ArtError> -where - F: FnOnce(Option<&V>) -> UpdateAction, -{ - let rnode = node.read_lock_or_restart()?; - - let prefix_match_len = rnode.prefix_matches(key); - if prefix_match_len.is_none() { - let (rparent, parent_key) = rparent.expect("direct children of the root have no prefix"); - let mut wparent = rparent.upgrade_to_write_lock_or_restart()?; - let mut wnode = rnode.upgrade_to_write_lock_or_restart()?; - - match value_fn(None) { - UpdateAction::Nothing => {} - UpdateAction::Insert(new_value) => { - insert_split_prefix(key, new_value, &mut wnode, &mut wparent, parent_key, guard)?; - } - UpdateAction::Remove => { - panic!("unexpected Remove action on insertion"); - } - } - wnode.write_unlock(); - wparent.write_unlock(); - return Ok(()); - } - let prefix_match_len = prefix_match_len.unwrap(); - let key = &key[prefix_match_len..]; - let level = level + prefix_match_len; - - if rnode.is_leaf() { - assert_eq!(key.len(), 0); - let (rparent, parent_key) = rparent.expect("root cannot be leaf"); - let mut wparent = rparent.upgrade_to_write_lock_or_restart()?; - let mut wnode = rnode.upgrade_to_write_lock_or_restart()?; - - // safety: Now that we have acquired the write lock, we have exclusive access to the - // value. XXX: There might be concurrent reads though? - let value_mut = wnode.get_leaf_value_mut(); - - match value_fn(Some(value_mut)) { - UpdateAction::Nothing => { - wparent.write_unlock(); - wnode.write_unlock(); - } - UpdateAction::Insert(_) => panic!("cannot insert over existing value"), - UpdateAction::Remove => { - guard.remember_obsolete_node(wnode.as_ptr()); - wparent.delete_child(parent_key); - wnode.write_unlock_obsolete(); - - if let Some(rgrandparent) = rgrandparent { - // FIXME: Ignore concurrency error. It doesn't lead to - // corruption, but it means we might leak something. Until - // another update cleans it up. - let _ = cleanup_parent(wparent, rgrandparent, guard); - } - } - } - - return Ok(()); - } - - let next_node = rnode.find_child_or_restart(key[0])?; - - if next_node.is_none() { - if rnode.is_full() { - let (rparent, parent_key) = rparent.expect("root node cannot become full"); - let mut wparent = rparent.upgrade_to_write_lock_or_restart()?; - let wnode = rnode.upgrade_to_write_lock_or_restart()?; - - match value_fn(None) { - UpdateAction::Nothing => { - wnode.write_unlock(); - wparent.write_unlock(); - } - UpdateAction::Insert(new_value) => { - insert_and_grow(key, new_value, wnode, &mut wparent, parent_key, guard)?; - wparent.write_unlock(); - } - UpdateAction::Remove => { - panic!("unexpected Remove action on insertion"); - } - }; - } else { - let mut wnode = rnode.upgrade_to_write_lock_or_restart()?; - if let Some((rparent, _)) = rparent { - rparent.read_unlock_or_restart()?; - } - match value_fn(None) { - UpdateAction::Nothing => {} - UpdateAction::Insert(new_value) => { - insert_to_node(&mut wnode, key, new_value, guard)?; - } - UpdateAction::Remove => { - panic!("unexpected Remove action on insertion"); - } - }; - wnode.write_unlock(); - } - Ok(()) - } else { - let next_child = next_node.unwrap(); // checked above it's not None - if let Some((ref rparent, _)) = rparent { - rparent.check_or_restart()?; - } - - // recurse to next level - update_recurse( - &key[1..], - value_fn, - next_child, - Some((rnode, key[0])), - rparent, - guard, - level + 1, - orig_key, - ) - } -} - -#[derive(Clone)] -enum PathElement { - Prefix(Vec), - KeyByte(u8), -} - -impl std::fmt::Debug for PathElement { - fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - match self { - PathElement::Prefix(prefix) => write!(fmt, "{prefix:?}"), - PathElement::KeyByte(key_byte) => write!(fmt, "{key_byte}"), - } - } -} - -pub(crate) fn dump_tree( - root: RootPtr, - epoch_pin: &'_ EpochPin, - dst: &mut dyn std::io::Write, -) { - let root_ref = NodeRef::from_root_ptr(root); - - let _ = dump_recurse(&[], root_ref, epoch_pin, 0, dst); -} - -// TODO: return an Err if writeln!() returns error, instead of unwrapping -#[allow(clippy::only_used_in_recursion)] -fn dump_recurse<'e, V: Value + std::fmt::Debug>( - path: &[PathElement], - node: NodeRef<'e, V>, - epoch_pin: &'e EpochPin, - level: usize, - dst: &mut dyn std::io::Write, -) -> Result<(), ConcurrentUpdateError> { - let indent = str::repeat(" ", level); - - let rnode = node.read_lock_or_restart()?; - let mut path = Vec::from(path); - let prefix = rnode.get_prefix(); - if !prefix.is_empty() { - path.push(PathElement::Prefix(Vec::from(prefix))); - } - - if rnode.is_leaf() { - let vptr = rnode.get_leaf_value_ptr()?; - // safety: It's OK to return a ref of the pointer because we checked the version - // and the lifetime of 'epoch_pin' enforces that the reference is only accessible - // as long as the epoch is pinned. - let val = unsafe { vptr.as_ref().unwrap() }; - writeln!(dst, "{indent} {path:?}: {val:?}").unwrap(); - return Ok(()); - } - - for key_byte in 0..=u8::MAX { - match rnode.find_child_or_restart(key_byte)? { - None => continue, - Some(child_ref) => { - let rchild = child_ref.read_lock_or_restart()?; - writeln!( - dst, - "{} {:?}, {}: prefix {:?}", - indent, - &path, - key_byte, - rchild.get_prefix() - ) - .unwrap(); - - let mut child_path = path.clone(); - child_path.push(PathElement::KeyByte(key_byte)); - - dump_recurse(&child_path, child_ref, epoch_pin, level + 1, dst)?; - } - } - } - - Ok(()) -} - -///```text -/// [fooba]r -> value -/// -/// [foo]b -> [a]r -> value -/// e -> [ls]e -> value -///``` -fn insert_split_prefix>( - key: &[u8], - value: V, - node: &mut WriteLockedNodeRef, - parent: &mut WriteLockedNodeRef, - parent_key: u8, - guard: &'_ TreeWriteGuard, -) -> Result<(), OutOfMemoryError> { - let old_node = node; - let old_prefix = old_node.get_prefix(); - let common_prefix_len = common_prefix(key, old_prefix); - - // Allocate a node for the new value. - let new_value_node = allocate_node_for_value( - &key[common_prefix_len + 1..], - value, - guard.tree_writer.allocator, - )?; - - // Allocate a new internal node with the common prefix - // FIXME: deallocate 'new_value_node' on OOM - let mut prefix_node = - node_ref::new_internal(&key[..common_prefix_len], guard.tree_writer.allocator)?; - - // Add the old node and the new nodes to the new internal node - prefix_node.insert_old_child(old_prefix[common_prefix_len], old_node); - prefix_node.insert_new_child(key[common_prefix_len], new_value_node); - - // Modify the prefix of the old child in place - old_node.truncate_prefix(old_prefix.len() - common_prefix_len - 1); - - // replace the pointer in the parent - parent.replace_child(parent_key, prefix_node.into_ptr()); - - Ok(()) -} - -fn insert_to_node>( - wnode: &mut WriteLockedNodeRef, - key: &[u8], - value: V, - guard: &'_ TreeWriteGuard, -) -> Result<(), OutOfMemoryError> { - let value_child = allocate_node_for_value(&key[1..], value, guard.tree_writer.allocator)?; - wnode.insert_child(key[0], value_child.into_ptr()); - Ok(()) -} - -// On entry: 'parent' and 'node' are locked -fn insert_and_grow<'e, 'g, K: Key, V: Value, A: ArtAllocator>( - key: &[u8], - value: V, - wnode: WriteLockedNodeRef, - parent: &mut WriteLockedNodeRef, - parent_key_byte: u8, - guard: &'g mut TreeWriteGuard<'e, K, V, A>, -) -> Result<(), ArtError> { - let mut bigger_node = wnode.grow(guard.tree_writer.allocator)?; - - // FIXME: deallocate 'bigger_node' on OOM - let value_child = allocate_node_for_value(&key[1..], value, guard.tree_writer.allocator)?; - bigger_node.insert_new_child(key[0], value_child); - - // Replace the pointer in the parent - parent.replace_child(parent_key_byte, bigger_node.into_ptr()); - - guard.remember_obsolete_node(wnode.as_ptr()); - wnode.write_unlock_obsolete(); - - Ok(()) -} - -fn cleanup_parent<'e, 'g, K: Key, V: Value, A: ArtAllocator>( - wparent: WriteLockedNodeRef, - rgrandparent: (ReadLockedNodeRef, u8), - guard: &'g mut TreeWriteGuard<'e, K, V, A>, -) -> Result<(), ArtError> { - let (rgrandparent, grandparent_key_byte) = rgrandparent; - - // If the parent becomes completely empty after the deletion, remove the parent from the - // grandparent. (This case is possible because we reserve only 8 bytes for the prefix.) - // TODO: not implemented. - - // If the parent has only one child, replace the parent with the remaining child. (This is not - // possible if the child's prefix field cannot absorb the parent's) - if wparent.num_children() == 1 { - // Try to lock the remaining child. This can fail if the child is updated - // concurrently. - let (key_byte, remaining_child) = wparent.find_remaining_child(); - - let mut wremaining_child = remaining_child.write_lock_or_restart()?; - - if 1 + wremaining_child.get_prefix().len() + wparent.get_prefix().len() <= MAX_PREFIX_LEN { - let mut wgrandparent = rgrandparent.upgrade_to_write_lock_or_restart()?; - - // Ok, we have locked the leaf, the parent, the grandparent, and the parent's only - // remaining leaf. Proceed with the updates. - - // Update the prefix on the remaining leaf - wremaining_child.prepend_prefix(wparent.get_prefix(), key_byte); - - // Replace the pointer in the grandparent to point directly to the remaining leaf - wgrandparent.replace_child(grandparent_key_byte, wremaining_child.as_ptr()); - - // Mark the parent as deleted. - guard.remember_obsolete_node(wparent.as_ptr()); - wparent.write_unlock_obsolete(); - return Ok(()); - } - } - - // If the parent's children would fit on a smaller node type after the deletion, replace it with - // a smaller node. - if wparent.can_shrink() { - let mut wgrandparent = rgrandparent.upgrade_to_write_lock_or_restart()?; - let smaller_node = wparent.shrink(guard.tree_writer.allocator)?; - - // Replace the pointer in the grandparent - wgrandparent.replace_child(grandparent_key_byte, smaller_node.into_ptr()); - - guard.remember_obsolete_node(wparent.as_ptr()); - wparent.write_unlock_obsolete(); - return Ok(()); - } - - // nothing to do - wparent.write_unlock(); - Ok(()) -} - -// Allocate a new leaf node to hold 'value'. If the key is long, we -// may need to allocate new internal nodes to hold it too -fn allocate_node_for_value<'a, V: Value, A: ArtAllocator>( - key: &[u8], - value: V, - allocator: &'a A, -) -> Result, OutOfMemoryError> { - let mut prefix_off = key.len().saturating_sub(MAX_PREFIX_LEN); - - let leaf_node = node_ref::new_leaf(&key[prefix_off..key.len()], value, allocator)?; - - let mut node = leaf_node; - while prefix_off > 0 { - // Need another internal node - let remain_prefix = &key[0..prefix_off]; - - prefix_off = remain_prefix.len().saturating_sub(MAX_PREFIX_LEN + 1); - let mut internal_node = node_ref::new_internal( - &remain_prefix[prefix_off..remain_prefix.len() - 1], - allocator, - )?; - internal_node.insert_new_child(*remain_prefix.last().unwrap(), node); - node = internal_node; - } - - Ok(node) -} - -fn common_prefix(a: &[u8], b: &[u8]) -> usize { - for i in 0..MAX_PREFIX_LEN { - if a[i] != b[i] { - return i; - } - } - panic!("prefixes are equal"); -} diff --git a/libs/neonart/src/algorithm/lock_and_version.rs b/libs/neonart/src/algorithm/lock_and_version.rs deleted file mode 100644 index 025897864c..0000000000 --- a/libs/neonart/src/algorithm/lock_and_version.rs +++ /dev/null @@ -1,117 +0,0 @@ -//! Each node in the tree has contains one atomic word that stores three things: -//! -//! Bit 0: set if the node is "obsolete". An obsolete node has been removed from the tree, -//! but might still be accessed by concurrent readers until the epoch expires. -//! Bit 1: set if the node is currently write-locked. Used as a spinlock. -//! Bits 2-63: Version number, incremented every time the node is modified. -//! -//! AtomicLockAndVersion represents that. - -use std::sync::atomic::{AtomicU64, Ordering}; - -pub(crate) struct ConcurrentUpdateError(); - -pub(crate) struct AtomicLockAndVersion { - inner: AtomicU64, -} - -impl AtomicLockAndVersion { - pub(crate) fn new() -> AtomicLockAndVersion { - AtomicLockAndVersion { - inner: AtomicU64::new(0), - } - } -} - -impl AtomicLockAndVersion { - pub(crate) fn read_lock_or_restart(&self) -> Result { - let version = self.await_node_unlocked(); - if is_obsolete(version) { - return Err(ConcurrentUpdateError()); - } - Ok(version) - } - - pub(crate) fn check_or_restart(&self, version: u64) -> Result<(), ConcurrentUpdateError> { - self.read_unlock_or_restart(version) - } - - pub(crate) fn read_unlock_or_restart(&self, version: u64) -> Result<(), ConcurrentUpdateError> { - if self.inner.load(Ordering::Acquire) != version { - return Err(ConcurrentUpdateError()); - } - Ok(()) - } - - pub(crate) fn upgrade_to_write_lock_or_restart( - &self, - version: u64, - ) -> Result<(), ConcurrentUpdateError> { - if self - .inner - .compare_exchange( - version, - set_locked_bit(version), - Ordering::Acquire, - Ordering::Relaxed, - ) - .is_err() - { - return Err(ConcurrentUpdateError()); - } - Ok(()) - } - - pub(crate) fn write_lock_or_restart(&self) -> Result<(), ConcurrentUpdateError> { - let old = self.inner.load(Ordering::Relaxed); - if is_obsolete(old) || is_locked(old) { - return Err(ConcurrentUpdateError()); - } - if self - .inner - .compare_exchange( - old, - set_locked_bit(old), - Ordering::Acquire, - Ordering::Relaxed, - ) - .is_err() - { - return Err(ConcurrentUpdateError()); - } - Ok(()) - } - - pub(crate) fn write_unlock(&self) { - // reset locked bit and overflow into version - self.inner.fetch_add(2, Ordering::Release); - } - - pub(crate) fn write_unlock_obsolete(&self) { - // set obsolete, reset locked, overflow into version - self.inner.fetch_add(3, Ordering::Release); - } - - // Helper functions - fn await_node_unlocked(&self) -> u64 { - let mut version = self.inner.load(Ordering::Acquire); - while is_locked(version) { - // spinlock - std::thread::yield_now(); - version = self.inner.load(Ordering::Acquire) - } - version - } -} - -fn set_locked_bit(version: u64) -> u64 { - version + 2 -} - -fn is_obsolete(version: u64) -> bool { - (version & 1) == 1 -} - -fn is_locked(version: u64) -> bool { - (version & 2) == 2 -} diff --git a/libs/neonart/src/algorithm/node_ptr.rs b/libs/neonart/src/algorithm/node_ptr.rs deleted file mode 100644 index 45822ba888..0000000000 --- a/libs/neonart/src/algorithm/node_ptr.rs +++ /dev/null @@ -1,1099 +0,0 @@ -//! This file contains the implementations of all the different node variants. -//! These implementations use pointers, see node_ref.rs for slightly safer -//! wrappers that deal with references instead. -use std::marker::PhantomData; -use std::ptr::NonNull; - -use super::lock_and_version::AtomicLockAndVersion; - -use crate::Value; -use crate::allocator::ArtAllocator; -use crate::allocator::OutOfMemoryError; - -pub(crate) const MAX_PREFIX_LEN: usize = 8; - -enum NodeTag { - Internal4, - Internal16, - Internal48, - Internal256, - Leaf, -} - -#[repr(C)] -struct NodeBase { - tag: NodeTag, -} - -pub(crate) struct NodePtr { - ptr: *mut NodeBase, - - phantom_value: PhantomData, -} - -impl PartialEq for NodePtr { - fn eq(&self, other: &NodePtr) -> bool { - self.ptr == other.ptr - } -} - -impl std::fmt::Debug for NodePtr { - fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - write!(fmt, "0x{}", self.ptr.addr()) - } -} - -impl Copy for NodePtr {} -impl Clone for NodePtr { - #[allow(clippy::non_canonical_clone_impl)] - fn clone(&self) -> NodePtr { - NodePtr { - ptr: self.ptr, - phantom_value: PhantomData, - } - } -} - -enum NodeVariant<'a, V> { - Internal4(&'a NodeInternal4), - Internal16(&'a NodeInternal16), - Internal48(&'a NodeInternal48), - Internal256(&'a NodeInternal256), - Leaf(&'a NodeLeaf), -} - -enum NodeVariantMut<'a, V> { - Internal4(&'a mut NodeInternal4), - Internal16(&'a mut NodeInternal16), - Internal48(&'a mut NodeInternal48), - Internal256(&'a mut NodeInternal256), - Leaf(&'a mut NodeLeaf), -} - -#[repr(C)] -pub struct NodeInternal4 { - tag: NodeTag, - prefix_len: u8, - num_children: u8, - - child_keys: [u8; 4], - - lock_and_version: AtomicLockAndVersion, - prefix: [u8; MAX_PREFIX_LEN], - - child_ptrs: [NodePtr; 4], -} - -#[repr(C)] -pub struct NodeInternal16 { - tag: NodeTag, - prefix_len: u8, - num_children: u8, - - lock_and_version: AtomicLockAndVersion, - prefix: [u8; MAX_PREFIX_LEN], - child_keys: [u8; 16], - child_ptrs: [NodePtr; 16], -} - -#[repr(C)] -pub struct NodeInternal48 { - tag: NodeTag, - prefix_len: u8, - num_children: u8, - - lock_and_version: AtomicLockAndVersion, - prefix: [u8; MAX_PREFIX_LEN], - child_indexes: [u8; 256], - child_ptrs: [NodePtr; 48], -} -const INVALID_CHILD_INDEX: u8 = u8::MAX; - -#[repr(C)] -pub struct NodeInternal256 { - tag: NodeTag, - prefix_len: u8, - num_children: u16, - - lock_and_version: AtomicLockAndVersion, - prefix: [u8; MAX_PREFIX_LEN], - - child_ptrs: [NodePtr; 256], -} - -#[repr(C)] -pub struct NodeLeaf { - tag: NodeTag, - prefix_len: u8, - - // TODO: It's not clear if we need a full version on leaf nodes. I think a single bit - // to indicate if the node is obsolete would be sufficient. - lock_and_version: AtomicLockAndVersion, - prefix: [u8; MAX_PREFIX_LEN], - - value: V, -} - -impl NodePtr { - pub(crate) fn is_leaf(&self) -> bool { - match self.variant() { - NodeVariant::Internal4(_) => false, - NodeVariant::Internal16(_) => false, - NodeVariant::Internal48(_) => false, - NodeVariant::Internal256(_) => false, - NodeVariant::Leaf(_) => true, - } - } - - pub(crate) fn lockword(&self) -> &AtomicLockAndVersion { - match self.variant() { - NodeVariant::Internal4(n) => &n.lock_and_version, - NodeVariant::Internal16(n) => &n.lock_and_version, - NodeVariant::Internal48(n) => &n.lock_and_version, - NodeVariant::Internal256(n) => &n.lock_and_version, - NodeVariant::Leaf(n) => &n.lock_and_version, - } - } - - pub(crate) fn is_null(&self) -> bool { - self.ptr.is_null() - } - - pub(crate) const fn null() -> NodePtr { - NodePtr { - ptr: std::ptr::null_mut(), - phantom_value: PhantomData, - } - } - - fn variant(&self) -> NodeVariant { - unsafe { - match (*self.ptr).tag { - NodeTag::Internal4 => NodeVariant::Internal4( - NonNull::new_unchecked(self.ptr.cast::>()).as_ref(), - ), - NodeTag::Internal16 => NodeVariant::Internal16( - NonNull::new_unchecked(self.ptr.cast::>()).as_ref(), - ), - NodeTag::Internal48 => NodeVariant::Internal48( - NonNull::new_unchecked(self.ptr.cast::>()).as_ref(), - ), - NodeTag::Internal256 => NodeVariant::Internal256( - NonNull::new_unchecked(self.ptr.cast::>()).as_ref(), - ), - NodeTag::Leaf => NodeVariant::Leaf( - NonNull::new_unchecked(self.ptr.cast::>()).as_ref(), - ), - } - } - } - - fn variant_mut(&mut self) -> NodeVariantMut { - unsafe { - match (*self.ptr).tag { - NodeTag::Internal4 => NodeVariantMut::Internal4( - NonNull::new_unchecked(self.ptr.cast::>()).as_mut(), - ), - NodeTag::Internal16 => NodeVariantMut::Internal16( - NonNull::new_unchecked(self.ptr.cast::>()).as_mut(), - ), - NodeTag::Internal48 => NodeVariantMut::Internal48( - NonNull::new_unchecked(self.ptr.cast::>()).as_mut(), - ), - NodeTag::Internal256 => NodeVariantMut::Internal256( - NonNull::new_unchecked(self.ptr.cast::>()).as_mut(), - ), - NodeTag::Leaf => NodeVariantMut::Leaf( - NonNull::new_unchecked(self.ptr.cast::>()).as_mut(), - ), - } - } - } -} - -impl NodePtr { - pub(crate) fn prefix_matches(&self, key: &[u8]) -> Option { - let node_prefix = self.get_prefix(); - assert!(node_prefix.len() <= key.len()); // because we only use fixed-size keys - if &key[0..node_prefix.len()] != node_prefix { - None - } else { - Some(node_prefix.len()) - } - } - - pub(crate) fn get_prefix(&self) -> &[u8] { - match self.variant() { - NodeVariant::Internal4(n) => n.get_prefix(), - NodeVariant::Internal16(n) => n.get_prefix(), - NodeVariant::Internal48(n) => n.get_prefix(), - NodeVariant::Internal256(n) => n.get_prefix(), - NodeVariant::Leaf(n) => n.get_prefix(), - } - } - - pub(crate) fn is_full(&self) -> bool { - match self.variant() { - NodeVariant::Internal4(n) => n.is_full(), - NodeVariant::Internal16(n) => n.is_full(), - NodeVariant::Internal48(n) => n.is_full(), - NodeVariant::Internal256(n) => n.is_full(), - NodeVariant::Leaf(_) => panic!("is_full() called on leaf node"), - } - } - - pub(crate) fn num_children(&self) -> usize { - match self.variant() { - NodeVariant::Internal4(n) => n.num_children as usize, - NodeVariant::Internal16(n) => n.num_children as usize, - NodeVariant::Internal48(n) => n.num_children as usize, - NodeVariant::Internal256(n) => n.num_children as usize, - NodeVariant::Leaf(_) => panic!("is_full() called on leaf node"), - } - } - - pub(crate) fn can_shrink(&self) -> bool { - match self.variant() { - NodeVariant::Internal4(n) => n.can_shrink(), - NodeVariant::Internal16(n) => n.can_shrink(), - NodeVariant::Internal48(n) => n.can_shrink(), - NodeVariant::Internal256(n) => n.can_shrink(), - NodeVariant::Leaf(_) => panic!("can_shrink() called on leaf node"), - } - } - - pub(crate) fn find_child(&self, key_byte: u8) -> Option> { - match self.variant() { - NodeVariant::Internal4(n) => n.find_child(key_byte), - NodeVariant::Internal16(n) => n.find_child(key_byte), - NodeVariant::Internal48(n) => n.find_child(key_byte), - NodeVariant::Internal256(n) => n.find_child(key_byte), - NodeVariant::Leaf(_) => panic!("find_child called on leaf node"), - } - } - - pub(crate) fn find_next_child(&self, key_byte: u8) -> Option<(u8, NodePtr)> { - match self.variant() { - NodeVariant::Internal4(n) => n.find_next_child(key_byte), - NodeVariant::Internal16(n) => n.find_next_child(key_byte), - NodeVariant::Internal48(n) => n.find_next_child(key_byte), - NodeVariant::Internal256(n) => n.find_next_child(key_byte), - NodeVariant::Leaf(_) => panic!("find_next_child called on leaf node"), - } - } - - pub(crate) fn truncate_prefix(&mut self, new_prefix_len: usize) { - match self.variant_mut() { - NodeVariantMut::Internal4(n) => n.truncate_prefix(new_prefix_len), - NodeVariantMut::Internal16(n) => n.truncate_prefix(new_prefix_len), - NodeVariantMut::Internal48(n) => n.truncate_prefix(new_prefix_len), - NodeVariantMut::Internal256(n) => n.truncate_prefix(new_prefix_len), - NodeVariantMut::Leaf(n) => n.truncate_prefix(new_prefix_len), - } - } - - pub(crate) fn prepend_prefix(&mut self, prefix: &[u8], prefix_byte: u8) { - match self.variant_mut() { - NodeVariantMut::Internal4(n) => n.prepend_prefix(prefix, prefix_byte), - NodeVariantMut::Internal16(n) => n.prepend_prefix(prefix, prefix_byte), - NodeVariantMut::Internal48(n) => n.prepend_prefix(prefix, prefix_byte), - NodeVariantMut::Internal256(n) => n.prepend_prefix(prefix, prefix_byte), - NodeVariantMut::Leaf(n) => n.prepend_prefix(prefix, prefix_byte), - } - } - - pub(crate) fn grow( - &self, - allocator: &impl ArtAllocator, - ) -> Result, OutOfMemoryError> { - match self.variant() { - NodeVariant::Internal4(n) => n.grow(allocator), - NodeVariant::Internal16(n) => n.grow(allocator), - NodeVariant::Internal48(n) => n.grow(allocator), - NodeVariant::Internal256(_) => panic!("cannot grow Internal256 node"), - NodeVariant::Leaf(_) => panic!("cannot grow Leaf node"), - } - } - - pub(crate) fn insert_child(&mut self, key_byte: u8, child: NodePtr) { - match self.variant_mut() { - NodeVariantMut::Internal4(n) => n.insert_child(key_byte, child), - NodeVariantMut::Internal16(n) => n.insert_child(key_byte, child), - NodeVariantMut::Internal48(n) => n.insert_child(key_byte, child), - NodeVariantMut::Internal256(n) => n.insert_child(key_byte, child), - NodeVariantMut::Leaf(_) => panic!("insert_child called on leaf node"), - } - } - - pub(crate) fn replace_child(&mut self, key_byte: u8, replacement: NodePtr) { - match self.variant_mut() { - NodeVariantMut::Internal4(n) => n.replace_child(key_byte, replacement), - NodeVariantMut::Internal16(n) => n.replace_child(key_byte, replacement), - NodeVariantMut::Internal48(n) => n.replace_child(key_byte, replacement), - NodeVariantMut::Internal256(n) => n.replace_child(key_byte, replacement), - NodeVariantMut::Leaf(_) => panic!("replace_child called on leaf node"), - } - } - - pub(crate) fn delete_child(&mut self, key_byte: u8) { - match self.variant_mut() { - NodeVariantMut::Internal4(n) => n.delete_child(key_byte), - NodeVariantMut::Internal16(n) => n.delete_child(key_byte), - NodeVariantMut::Internal48(n) => n.delete_child(key_byte), - NodeVariantMut::Internal256(n) => n.delete_child(key_byte), - NodeVariantMut::Leaf(_) => panic!("delete_child called on leaf node"), - } - } - - pub(crate) fn shrink( - &self, - allocator: &impl ArtAllocator, - ) -> Result, OutOfMemoryError> { - match self.variant() { - NodeVariant::Internal4(_) => panic!("shrink called on internal4 node"), - NodeVariant::Internal16(n) => n.shrink(allocator), - NodeVariant::Internal48(n) => n.shrink(allocator), - NodeVariant::Internal256(n) => n.shrink(allocator), - NodeVariant::Leaf(_) => panic!("shrink called on leaf node"), - } - } - - pub(crate) fn get_leaf_value(&self) -> &V { - match self.variant() { - NodeVariant::Internal4(_) - | NodeVariant::Internal16(_) - | NodeVariant::Internal48(_) - | NodeVariant::Internal256(_) => panic!("get_leaf_value called on internal node"), - NodeVariant::Leaf(n) => n.get_leaf_value(), - } - } - - pub(crate) fn get_leaf_value_mut(&mut self) -> &mut V { - match self.variant_mut() { - NodeVariantMut::Internal4(_) - | NodeVariantMut::Internal16(_) - | NodeVariantMut::Internal48(_) - | NodeVariantMut::Internal256(_) => panic!("get_leaf_value called on internal node"), - NodeVariantMut::Leaf(n) => n.get_leaf_value_mut(), - } - } - - pub(crate) fn deallocate(self, allocator: &impl ArtAllocator) { - match self.variant() { - NodeVariant::Internal4(_) => allocator.dealloc_node_internal4(self.ptr.cast()), - NodeVariant::Internal16(_) => allocator.dealloc_node_internal16(self.ptr.cast()), - NodeVariant::Internal48(_) => allocator.dealloc_node_internal48(self.ptr.cast()), - NodeVariant::Internal256(_) => allocator.dealloc_node_internal256(self.ptr.cast()), - NodeVariant::Leaf(_) => allocator.dealloc_node_leaf(self.ptr.cast()), - } - } -} - -pub fn new_root( - allocator: &impl ArtAllocator, -) -> Result, OutOfMemoryError> { - let ptr: *mut NodeInternal256 = allocator.alloc_node_internal256().cast(); - if ptr.is_null() { - return Err(OutOfMemoryError()); - } - - unsafe { - *ptr = NodeInternal256::::new(); - } - - Ok(ptr.into()) -} - -pub fn new_internal( - prefix: &[u8], - allocator: &impl ArtAllocator, -) -> Result, OutOfMemoryError> { - let ptr: *mut NodeInternal4 = allocator.alloc_node_internal4().cast(); - if ptr.is_null() { - return Err(OutOfMemoryError()); - } - let mut init = NodeInternal4 { - tag: NodeTag::Internal4, - lock_and_version: AtomicLockAndVersion::new(), - - prefix: [8; MAX_PREFIX_LEN], - prefix_len: prefix.len() as u8, - num_children: 0, - - child_keys: [0; 4], - child_ptrs: [const { NodePtr::null() }; 4], - }; - init.prefix[0..prefix.len()].copy_from_slice(prefix); - unsafe { ptr.write(init) }; - - Ok(ptr.into()) -} - -pub fn new_leaf( - prefix: &[u8], - value: V, - allocator: &impl ArtAllocator, -) -> Result, OutOfMemoryError> { - let ptr: *mut NodeLeaf = allocator.alloc_node_leaf().cast(); - if ptr.is_null() { - return Err(OutOfMemoryError()); - } - let mut init = NodeLeaf { - tag: NodeTag::Leaf, - lock_and_version: AtomicLockAndVersion::new(), - - prefix: [8; MAX_PREFIX_LEN], - prefix_len: prefix.len() as u8, - - value, - }; - init.prefix[0..prefix.len()].copy_from_slice(prefix); - unsafe { ptr.write(init) }; - - Ok(ptr.into()) -} - -impl NodeInternal4 { - fn get_prefix(&self) -> &[u8] { - &self.prefix[0..self.prefix_len as usize] - } - - fn prepend_prefix(&mut self, prefix: &[u8], prefix_byte: u8) { - assert!(1 + prefix.len() + self.prefix_len as usize <= MAX_PREFIX_LEN); - let mut new = Vec::with_capacity(MAX_PREFIX_LEN); - new.extend_from_slice(prefix); - new.push(prefix_byte); - new.extend_from_slice(&self.prefix[0..self.prefix_len as usize]); - self.prefix[0..new.len()].copy_from_slice(&new); - self.prefix_len = new.len() as u8; - } - - fn truncate_prefix(&mut self, new_prefix_len: usize) { - assert!(new_prefix_len < self.prefix_len as usize); - let prefix = &mut self.prefix; - let offset = self.prefix_len as usize - new_prefix_len; - for i in 0..new_prefix_len { - prefix[i] = prefix[i + offset]; - } - self.prefix_len = new_prefix_len as u8; - } - - fn find_child(&self, key: u8) -> Option> { - for i in 0..self.num_children as usize { - if self.child_keys[i] == key { - return Some(self.child_ptrs[i]); - } - } - None - } - - fn find_next_child(&self, min_key: u8) -> Option<(u8, NodePtr)> { - let mut found: Option<(usize, u8)> = None; - for i in 0..self.num_children as usize { - let this_key = self.child_keys[i]; - if this_key >= min_key { - if let Some((_, found_key)) = found { - if this_key < found_key { - found = Some((i, this_key)); - } - } else { - found = Some((i, this_key)); - } - } - } - if let Some((found_idx, found_key)) = found { - Some((found_key, self.child_ptrs[found_idx])) - } else { - None - } - } - - fn replace_child(&mut self, key_byte: u8, replacement: NodePtr) { - for i in 0..self.num_children as usize { - if self.child_keys[i] == key_byte { - self.child_ptrs[i] = replacement; - return; - } - } - panic!("could not re-find parent with key {key_byte}"); - } - - fn delete_child(&mut self, key_byte: u8) { - for i in 0..self.num_children as usize { - if self.child_keys[i] == key_byte { - self.num_children -= 1; - for j in i..self.num_children as usize { - self.child_keys[j] = self.child_keys[j + 1]; - self.child_ptrs[j] = self.child_ptrs[j + 1]; - } - return; - } - } - panic!("could not re-find parent with key {key_byte}"); - } - - fn is_full(&self) -> bool { - self.num_children == 4 - } - - fn can_shrink(&self) -> bool { - false - } - - fn insert_child(&mut self, key_byte: u8, child: NodePtr) { - assert!(self.num_children < 4); - - let idx = self.num_children as usize; - self.child_keys[idx] = key_byte; - self.child_ptrs[idx] = child; - self.num_children += 1; - } - - fn grow(&self, allocator: &impl ArtAllocator) -> Result, OutOfMemoryError> { - let ptr: *mut NodeInternal16 = allocator.alloc_node_internal16().cast(); - if ptr.is_null() { - return Err(OutOfMemoryError()); - } - let mut init = NodeInternal16 { - tag: NodeTag::Internal16, - lock_and_version: AtomicLockAndVersion::new(), - - prefix: self.prefix, - prefix_len: self.prefix_len, - num_children: self.num_children, - - child_keys: [0; 16], - child_ptrs: [const { NodePtr::null() }; 16], - }; - for i in 0..self.num_children as usize { - init.child_keys[i] = self.child_keys[i]; - init.child_ptrs[i] = self.child_ptrs[i]; - } - unsafe { ptr.write(init) }; - Ok(ptr.into()) - } -} - -impl NodeInternal16 { - fn get_prefix(&self) -> &[u8] { - &self.prefix[0..self.prefix_len as usize] - } - - fn prepend_prefix(&mut self, prefix: &[u8], prefix_byte: u8) { - assert!(1 + prefix.len() + self.prefix_len as usize <= MAX_PREFIX_LEN); - let mut new = Vec::with_capacity(MAX_PREFIX_LEN); - new.extend_from_slice(prefix); - new.push(prefix_byte); - new.extend_from_slice(&self.prefix[0..self.prefix_len as usize]); - self.prefix[0..new.len()].copy_from_slice(&new); - self.prefix_len = new.len() as u8; - } - - fn truncate_prefix(&mut self, new_prefix_len: usize) { - assert!(new_prefix_len < self.prefix_len as usize); - let prefix = &mut self.prefix; - let offset = self.prefix_len as usize - new_prefix_len; - for i in 0..new_prefix_len { - prefix[i] = prefix[i + offset]; - } - self.prefix_len = new_prefix_len as u8; - } - - fn find_child(&self, key_byte: u8) -> Option> { - for i in 0..self.num_children as usize { - if self.child_keys[i] == key_byte { - return Some(self.child_ptrs[i]); - } - } - None - } - - fn find_next_child(&self, min_key: u8) -> Option<(u8, NodePtr)> { - let mut found: Option<(usize, u8)> = None; - for i in 0..self.num_children as usize { - let this_key = self.child_keys[i]; - if this_key >= min_key { - if let Some((_, found_key)) = found { - if this_key < found_key { - found = Some((i, this_key)); - } - } else { - found = Some((i, this_key)); - } - } - } - if let Some((found_idx, found_key)) = found { - Some((found_key, self.child_ptrs[found_idx])) - } else { - None - } - } - - fn replace_child(&mut self, key_byte: u8, replacement: NodePtr) { - for i in 0..self.num_children as usize { - if self.child_keys[i] == key_byte { - self.child_ptrs[i] = replacement; - return; - } - } - panic!("could not re-find parent with key {key_byte}"); - } - - fn delete_child(&mut self, key_byte: u8) { - for i in 0..self.num_children as usize { - if self.child_keys[i] == key_byte { - self.num_children -= 1; - for j in i..self.num_children as usize { - self.child_keys[j] = self.child_keys[j + 1]; - self.child_ptrs[j] = self.child_ptrs[j + 1]; - } - return; - } - } - panic!("could not re-find parent with key {key_byte}"); - } - - fn is_full(&self) -> bool { - self.num_children == 16 - } - - fn can_shrink(&self) -> bool { - self.num_children <= 4 - } - - fn insert_child(&mut self, key_byte: u8, child: NodePtr) { - assert!(self.num_children < 16); - - let idx = self.num_children as usize; - self.child_keys[idx] = key_byte; - self.child_ptrs[idx] = child; - self.num_children += 1; - } - - fn grow(&self, allocator: &impl ArtAllocator) -> Result, OutOfMemoryError> { - let ptr: *mut NodeInternal48 = allocator.alloc_node_internal48().cast(); - if ptr.is_null() { - return Err(OutOfMemoryError()); - } - let mut init = NodeInternal48 { - tag: NodeTag::Internal48, - lock_and_version: AtomicLockAndVersion::new(), - - prefix: self.prefix, - prefix_len: self.prefix_len, - num_children: self.num_children, - - child_indexes: [INVALID_CHILD_INDEX; 256], - child_ptrs: [const { NodePtr::null() }; 48], - }; - for i in 0..self.num_children as usize { - let idx = self.child_keys[i] as usize; - init.child_indexes[idx] = i as u8; - init.child_ptrs[i] = self.child_ptrs[i]; - } - init.validate(); - unsafe { ptr.write(init) }; - Ok(ptr.into()) - } - - fn shrink(&self, allocator: &impl ArtAllocator) -> Result, OutOfMemoryError> { - assert!(self.num_children <= 4); - let ptr: *mut NodeInternal4 = allocator.alloc_node_internal4().cast(); - if ptr.is_null() { - return Err(OutOfMemoryError()); - } - let mut init = NodeInternal4 { - tag: NodeTag::Internal4, - lock_and_version: AtomicLockAndVersion::new(), - - prefix: self.prefix, - prefix_len: self.prefix_len, - num_children: self.num_children, - - child_keys: [0; 4], - child_ptrs: [const { NodePtr::null() }; 4], - }; - for i in 0..self.num_children as usize { - init.child_keys[i] = self.child_keys[i]; - init.child_ptrs[i] = self.child_ptrs[i]; - } - unsafe { ptr.write(init) }; - Ok(ptr.into()) - } -} - -impl NodeInternal48 { - fn validate(&self) { - let mut shadow_indexes = std::collections::HashSet::new(); - let mut count = 0; - for i in 0..256 { - let idx = self.child_indexes[i]; - if idx != INVALID_CHILD_INDEX { - assert!( - idx < self.num_children, - "i {} idx {}, num_children {}", - i, - idx, - self.num_children - ); - assert!(!shadow_indexes.contains(&idx)); - shadow_indexes.insert(idx); - count += 1; - } - } - assert_eq!(count, self.num_children); - } - - fn prepend_prefix(&mut self, prefix: &[u8], prefix_byte: u8) { - assert!(1 + prefix.len() + self.prefix_len as usize <= MAX_PREFIX_LEN); - let mut new = Vec::with_capacity(MAX_PREFIX_LEN); - new.extend_from_slice(prefix); - new.push(prefix_byte); - new.extend_from_slice(&self.prefix[0..self.prefix_len as usize]); - self.prefix[0..new.len()].copy_from_slice(&new); - self.prefix_len = new.len() as u8; - } - - fn get_prefix(&self) -> &[u8] { - &self.prefix[0..self.prefix_len as usize] - } - - fn truncate_prefix(&mut self, new_prefix_len: usize) { - assert!(new_prefix_len < self.prefix_len as usize); - let prefix = &mut self.prefix; - let offset = self.prefix_len as usize - new_prefix_len; - for i in 0..new_prefix_len { - prefix[i] = prefix[i + offset]; - } - self.prefix_len = new_prefix_len as u8; - } - - fn find_child(&self, key_byte: u8) -> Option> { - let idx = self.child_indexes[key_byte as usize]; - if idx != INVALID_CHILD_INDEX { - Some(self.child_ptrs[idx as usize]) - } else { - None - } - } - - fn find_next_child(&self, min_key: u8) -> Option<(u8, NodePtr)> { - for key in min_key..=u8::MAX { - let idx = self.child_indexes[key as usize]; - if idx != INVALID_CHILD_INDEX { - return Some((key, self.child_ptrs[idx as usize])); - } - } - None - } - - fn replace_child(&mut self, key_byte: u8, replacement: NodePtr) { - let idx = self.child_indexes[key_byte as usize]; - if idx == INVALID_CHILD_INDEX { - panic!("could not re-find parent with key {key_byte}"); - } - self.child_ptrs[idx as usize] = replacement; - self.validate(); - } - - fn delete_child(&mut self, key_byte: u8) { - let idx = self.child_indexes[key_byte as usize] as usize; - if idx == INVALID_CHILD_INDEX as usize { - panic!("could not re-find parent with key {key_byte}"); - } - - // Compact the child_ptrs array - let removed_idx = (self.num_children - 1) as usize; - if idx != removed_idx { - for i in 0..=u8::MAX as usize { - if self.child_indexes[i] as usize == removed_idx { - self.child_indexes[i] = idx as u8; - self.child_ptrs[idx] = self.child_ptrs[removed_idx]; - - self.child_indexes[key_byte as usize] = INVALID_CHILD_INDEX; - self.num_children -= 1; - self.validate(); - return; - } - } - panic!("could not re-find last index {removed_idx} on Internal48 node"); - } else { - self.child_indexes[key_byte as usize] = INVALID_CHILD_INDEX; - self.num_children -= 1; - } - } - - fn is_full(&self) -> bool { - self.num_children == 48 - } - - fn can_shrink(&self) -> bool { - self.num_children <= 16 - } - - fn insert_child(&mut self, key_byte: u8, child: NodePtr) { - assert!(self.num_children < 48); - assert!(self.child_indexes[key_byte as usize] == INVALID_CHILD_INDEX); - let idx = self.num_children; - self.child_indexes[key_byte as usize] = idx; - self.child_ptrs[idx as usize] = child; - self.num_children += 1; - self.validate(); - } - - fn grow(&self, allocator: &impl ArtAllocator) -> Result, OutOfMemoryError> { - let ptr: *mut NodeInternal256 = allocator.alloc_node_internal256().cast(); - if ptr.is_null() { - return Err(OutOfMemoryError()); - } - let mut init = NodeInternal256 { - tag: NodeTag::Internal256, - lock_and_version: AtomicLockAndVersion::new(), - - prefix: self.prefix, - prefix_len: self.prefix_len, - num_children: self.num_children as u16, - - child_ptrs: [const { NodePtr::null() }; 256], - }; - for i in 0..256 { - let idx = self.child_indexes[i]; - if idx != INVALID_CHILD_INDEX { - init.child_ptrs[i] = self.child_ptrs[idx as usize]; - } - } - unsafe { ptr.write(init) }; - Ok(ptr.into()) - } - - fn shrink(&self, allocator: &impl ArtAllocator) -> Result, OutOfMemoryError> { - assert!(self.num_children <= 16); - let ptr: *mut NodeInternal16 = allocator.alloc_node_internal16().cast(); - if ptr.is_null() { - return Err(OutOfMemoryError()); - } - let mut init = NodeInternal16 { - tag: NodeTag::Internal16, - lock_and_version: AtomicLockAndVersion::new(), - - prefix: self.prefix, - prefix_len: self.prefix_len, - num_children: self.num_children, - - child_keys: [0; 16], - child_ptrs: [const { NodePtr::null() }; 16], - }; - let mut j = 0; - for i in 0..256 { - let idx = self.child_indexes[i]; - if idx != INVALID_CHILD_INDEX { - init.child_keys[j] = i as u8; - init.child_ptrs[j] = self.child_ptrs[idx as usize]; - j += 1; - } - } - assert_eq!(j, self.num_children as usize); - unsafe { ptr.write(init) }; - Ok(ptr.into()) - } -} - -impl NodeInternal256 { - fn get_prefix(&self) -> &[u8] { - &self.prefix[0..self.prefix_len as usize] - } - - fn prepend_prefix(&mut self, prefix: &[u8], prefix_byte: u8) { - assert!(1 + prefix.len() + self.prefix_len as usize <= MAX_PREFIX_LEN); - let mut new = Vec::with_capacity(MAX_PREFIX_LEN); - new.extend_from_slice(prefix); - new.push(prefix_byte); - new.extend_from_slice(&self.prefix[0..self.prefix_len as usize]); - self.prefix[0..new.len()].copy_from_slice(&new); - self.prefix_len = new.len() as u8; - } - - fn truncate_prefix(&mut self, new_prefix_len: usize) { - assert!(new_prefix_len < self.prefix_len as usize); - let prefix = &mut self.prefix; - let offset = self.prefix_len as usize - new_prefix_len; - for i in 0..new_prefix_len { - prefix[i] = prefix[i + offset]; - } - self.prefix_len = new_prefix_len as u8; - } - - fn find_child(&self, key_byte: u8) -> Option> { - let idx = key_byte as usize; - if !self.child_ptrs[idx].is_null() { - Some(self.child_ptrs[idx]) - } else { - None - } - } - - fn find_next_child(&self, min_key: u8) -> Option<(u8, NodePtr)> { - for key in min_key..=u8::MAX { - if !self.child_ptrs[key as usize].is_null() { - return Some((key, self.child_ptrs[key as usize])); - } - } - None - } - - fn replace_child(&mut self, key_byte: u8, replacement: NodePtr) { - let idx = key_byte as usize; - if !self.child_ptrs[idx].is_null() { - self.child_ptrs[idx] = replacement - } else { - panic!("could not re-find parent with key {key_byte}"); - } - } - - fn delete_child(&mut self, key_byte: u8) { - let idx = key_byte as usize; - if self.child_ptrs[idx].is_null() { - panic!("could not re-find parent with key {key_byte}"); - } - self.num_children -= 1; - self.child_ptrs[idx] = NodePtr::null(); - } - - fn is_full(&self) -> bool { - self.num_children == 256 - } - - fn can_shrink(&self) -> bool { - self.num_children <= 48 - } - - fn insert_child(&mut self, key_byte: u8, child: NodePtr) { - assert!(self.num_children < 256); - assert!(self.child_ptrs[key_byte as usize].is_null()); - self.child_ptrs[key_byte as usize] = child; - self.num_children += 1; - } - - fn shrink(&self, allocator: &impl ArtAllocator) -> Result, OutOfMemoryError> { - assert!(self.num_children <= 48); - let ptr: *mut NodeInternal48 = allocator.alloc_node_internal48().cast(); - if ptr.is_null() { - return Err(OutOfMemoryError()); - } - let mut init = NodeInternal48 { - tag: NodeTag::Internal48, - lock_and_version: AtomicLockAndVersion::new(), - - prefix: self.prefix, - prefix_len: self.prefix_len, - num_children: self.num_children as u8, - - child_indexes: [INVALID_CHILD_INDEX; 256], - child_ptrs: [const { NodePtr::null() }; 48], - }; - let mut j = 0; - for i in 0..256 { - if !self.child_ptrs[i].is_null() { - init.child_indexes[i] = j; - init.child_ptrs[j as usize] = self.child_ptrs[i]; - j += 1; - } - } - assert_eq!(j as u16, self.num_children); - unsafe { ptr.write(init) }; - Ok(ptr.into()) - } -} - -impl NodeLeaf { - fn get_prefix(&self) -> &[u8] { - &self.prefix[0..self.prefix_len as usize] - } - - fn prepend_prefix(&mut self, prefix: &[u8], prefix_byte: u8) { - assert!(1 + prefix.len() + self.prefix_len as usize <= MAX_PREFIX_LEN); - let mut new = Vec::with_capacity(MAX_PREFIX_LEN); - new.extend_from_slice(prefix); - new.push(prefix_byte); - new.extend_from_slice(&self.prefix[0..self.prefix_len as usize]); - self.prefix[0..new.len()].copy_from_slice(&new); - self.prefix_len = new.len() as u8; - } - - fn truncate_prefix(&mut self, new_prefix_len: usize) { - assert!(new_prefix_len < self.prefix_len as usize); - let prefix = &mut self.prefix; - let offset = self.prefix_len as usize - new_prefix_len; - for i in 0..new_prefix_len { - prefix[i] = prefix[i + offset]; - } - self.prefix_len = new_prefix_len as u8; - } - - fn get_leaf_value<'a: 'b, 'b>(&'a self) -> &'b V { - &self.value - } - - fn get_leaf_value_mut<'a: 'b, 'b>(&'a mut self) -> &'b mut V { - &mut self.value - } -} - -impl NodeInternal256 { - pub(crate) fn new() -> NodeInternal256 { - NodeInternal256 { - tag: NodeTag::Internal256, - lock_and_version: AtomicLockAndVersion::new(), - - prefix: [0; MAX_PREFIX_LEN], - prefix_len: 0, - num_children: 0, - - child_ptrs: [const { NodePtr::null() }; 256], - } - } -} - -impl From<*mut NodeInternal4> for NodePtr { - fn from(val: *mut NodeInternal4) -> NodePtr { - NodePtr { - ptr: val.cast(), - phantom_value: PhantomData, - } - } -} -impl From<*mut NodeInternal16> for NodePtr { - fn from(val: *mut NodeInternal16) -> NodePtr { - NodePtr { - ptr: val.cast(), - phantom_value: PhantomData, - } - } -} - -impl From<*mut NodeInternal48> for NodePtr { - fn from(val: *mut NodeInternal48) -> NodePtr { - NodePtr { - ptr: val.cast(), - phantom_value: PhantomData, - } - } -} - -impl From<*mut NodeInternal256> for NodePtr { - fn from(val: *mut NodeInternal256) -> NodePtr { - NodePtr { - ptr: val.cast(), - phantom_value: PhantomData, - } - } -} - -impl From<*mut NodeLeaf> for NodePtr { - fn from(val: *mut NodeLeaf) -> NodePtr { - NodePtr { - ptr: val.cast(), - phantom_value: PhantomData, - } - } -} diff --git a/libs/neonart/src/algorithm/node_ref.rs b/libs/neonart/src/algorithm/node_ref.rs deleted file mode 100644 index 5403aaabdf..0000000000 --- a/libs/neonart/src/algorithm/node_ref.rs +++ /dev/null @@ -1,349 +0,0 @@ -use std::fmt::Debug; -use std::marker::PhantomData; - -use super::node_ptr; -use super::node_ptr::NodePtr; -use crate::EpochPin; -use crate::Value; -use crate::algorithm::lock_and_version::AtomicLockAndVersion; -use crate::algorithm::lock_and_version::ConcurrentUpdateError; -use crate::allocator::ArtAllocator; -use crate::allocator::OutOfMemoryError; - -pub struct NodeRef<'e, V> { - ptr: NodePtr, - - phantom: PhantomData<&'e EpochPin<'e>>, -} - -impl<'e, V> Debug for NodeRef<'e, V> { - fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - write!(fmt, "{:?}", self.ptr) - } -} - -impl<'e, V: Value> NodeRef<'e, V> { - pub(crate) fn from_root_ptr(root_ptr: NodePtr) -> NodeRef<'e, V> { - NodeRef { - ptr: root_ptr, - phantom: PhantomData, - } - } - - pub(crate) fn read_lock_or_restart( - &self, - ) -> Result, ConcurrentUpdateError> { - let version = self.lockword().read_lock_or_restart()?; - Ok(ReadLockedNodeRef { - ptr: self.ptr, - version, - phantom: self.phantom, - }) - } - - pub(crate) fn write_lock_or_restart( - &self, - ) -> Result, ConcurrentUpdateError> { - self.lockword().write_lock_or_restart()?; - Ok(WriteLockedNodeRef { - ptr: self.ptr, - phantom: self.phantom, - }) - } - - fn lockword(&self) -> &AtomicLockAndVersion { - self.ptr.lockword() - } -} - -/// A reference to a node that has been optimistically read-locked. The functions re-check -/// the version after each read. -pub struct ReadLockedNodeRef<'e, V> { - ptr: NodePtr, - version: u64, - - phantom: PhantomData<&'e EpochPin<'e>>, -} - -impl<'e, V: Value> ReadLockedNodeRef<'e, V> { - pub(crate) fn is_leaf(&self) -> bool { - self.ptr.is_leaf() - } - - pub(crate) fn is_full(&self) -> bool { - self.ptr.is_full() - } - - pub(crate) fn get_prefix(&self) -> &[u8] { - self.ptr.get_prefix() - } - - /// Note: because we're only holding a read lock, the prefix can change concurrently. - /// You must be prepared to restart, if read_unlock() returns error later. - /// - /// Returns the length of the prefix, or None if it's not a match - pub(crate) fn prefix_matches(&self, key: &[u8]) -> Option { - self.ptr.prefix_matches(key) - } - - pub(crate) fn find_child_or_restart( - &self, - key_byte: u8, - ) -> Result>, ConcurrentUpdateError> { - let child_or_value = self.ptr.find_child(key_byte); - self.ptr.lockword().check_or_restart(self.version)?; - - match child_or_value { - None => Ok(None), - Some(child_ptr) => Ok(Some(NodeRef { - ptr: child_ptr, - phantom: self.phantom, - })), - } - } - - pub(crate) fn find_next_child_or_restart( - &self, - min_key_byte: u8, - ) -> Result)>, ConcurrentUpdateError> { - let child_or_value = self.ptr.find_next_child(min_key_byte); - self.ptr.lockword().check_or_restart(self.version)?; - - match child_or_value { - None => Ok(None), - Some((k, child_ptr)) => Ok(Some(( - k, - NodeRef { - ptr: child_ptr, - phantom: self.phantom, - }, - ))), - } - } - - pub(crate) fn get_leaf_value_ptr(&self) -> Result<*const V, ConcurrentUpdateError> { - let result = self.ptr.get_leaf_value(); - self.ptr.lockword().check_or_restart(self.version)?; - - // Extend the lifetime. - let result = std::ptr::from_ref(result); - - Ok(result) - } - - pub(crate) fn upgrade_to_write_lock_or_restart( - self, - ) -> Result, ConcurrentUpdateError> { - self.ptr - .lockword() - .upgrade_to_write_lock_or_restart(self.version)?; - - Ok(WriteLockedNodeRef { - ptr: self.ptr, - phantom: self.phantom, - }) - } - - pub(crate) fn read_unlock_or_restart(self) -> Result<(), ConcurrentUpdateError> { - self.ptr.lockword().check_or_restart(self.version)?; - Ok(()) - } - - pub(crate) fn check_or_restart(&self) -> Result<(), ConcurrentUpdateError> { - self.ptr.lockword().check_or_restart(self.version)?; - Ok(()) - } -} - -/// A reference to a node that has been optimistically read-locked. The functions re-check -/// the version after each read. -pub struct WriteLockedNodeRef<'e, V> { - ptr: NodePtr, - phantom: PhantomData<&'e EpochPin<'e>>, -} - -impl<'e, V: Value> WriteLockedNodeRef<'e, V> { - pub(crate) fn can_shrink(&self) -> bool { - self.ptr.can_shrink() - } - - pub(crate) fn num_children(&self) -> usize { - self.ptr.num_children() - } - - pub(crate) fn write_unlock(mut self) { - self.ptr.lockword().write_unlock(); - self.ptr = NodePtr::null(); - } - - pub(crate) fn write_unlock_obsolete(mut self) { - self.ptr.lockword().write_unlock_obsolete(); - self.ptr = NodePtr::null(); - } - - pub(crate) fn get_prefix(&self) -> &[u8] { - self.ptr.get_prefix() - } - - pub(crate) fn truncate_prefix(&mut self, new_prefix_len: usize) { - self.ptr.truncate_prefix(new_prefix_len) - } - - pub(crate) fn prepend_prefix(&mut self, prefix: &[u8], prefix_byte: u8) { - self.ptr.prepend_prefix(prefix, prefix_byte) - } - - pub(crate) fn insert_child(&mut self, key_byte: u8, child: NodePtr) { - self.ptr.insert_child(key_byte, child) - } - - pub(crate) fn get_leaf_value_mut(&mut self) -> &mut V { - self.ptr.get_leaf_value_mut() - } - - pub(crate) fn grow<'a, A>( - &self, - allocator: &'a A, - ) -> Result, OutOfMemoryError> - where - A: ArtAllocator, - { - let new_node = self.ptr.grow(allocator)?; - Ok(NewNodeRef { - ptr: new_node, - allocator, - extra_nodes: Vec::new(), - }) - } - - pub(crate) fn shrink<'a, A>( - &self, - allocator: &'a A, - ) -> Result, OutOfMemoryError> - where - A: ArtAllocator, - { - let new_node = self.ptr.shrink(allocator)?; - Ok(NewNodeRef { - ptr: new_node, - allocator, - extra_nodes: Vec::new(), - }) - } - - pub(crate) fn as_ptr(&self) -> NodePtr { - self.ptr - } - - pub(crate) fn replace_child(&mut self, key_byte: u8, replacement: NodePtr) { - self.ptr.replace_child(key_byte, replacement); - } - - pub(crate) fn delete_child(&mut self, key_byte: u8) { - self.ptr.delete_child(key_byte); - } - - pub(crate) fn find_remaining_child(&self) -> (u8, NodeRef<'e, V>) { - assert_eq!(self.num_children(), 1); - let child_or_value = self.ptr.find_next_child(0); - - match child_or_value { - None => panic!("could not find only child in node"), - Some((k, child_ptr)) => ( - k, - NodeRef { - ptr: child_ptr, - phantom: self.phantom, - }, - ), - } - } -} - -impl<'e, V> Drop for WriteLockedNodeRef<'e, V> { - fn drop(&mut self) { - if !self.ptr.is_null() { - self.ptr.lockword().write_unlock(); - } - } -} - -pub(crate) struct NewNodeRef<'a, V, A> -where - V: Value, - A: ArtAllocator, -{ - ptr: NodePtr, - allocator: &'a A, - - extra_nodes: Vec>, -} - -impl<'a, V, A> NewNodeRef<'a, V, A> -where - V: Value, - A: ArtAllocator, -{ - pub(crate) fn insert_old_child(&mut self, key_byte: u8, child: &WriteLockedNodeRef) { - self.ptr.insert_child(key_byte, child.as_ptr()) - } - - pub(crate) fn into_ptr(mut self) -> NodePtr { - let ptr = self.ptr; - self.ptr = NodePtr::null(); - ptr - } - - pub(crate) fn insert_new_child(&mut self, key_byte: u8, child: NewNodeRef<'a, V, A>) { - let child_ptr = child.into_ptr(); - self.ptr.insert_child(key_byte, child_ptr); - self.extra_nodes.push(child_ptr); - } -} - -impl<'a, V, A> Drop for NewNodeRef<'a, V, A> -where - V: Value, - A: ArtAllocator, -{ - /// This drop implementation deallocates the newly allocated node, if into_ptr() was not called. - fn drop(&mut self) { - if !self.ptr.is_null() { - self.ptr.deallocate(self.allocator); - for p in self.extra_nodes.iter() { - p.deallocate(self.allocator); - } - } - } -} - -pub(crate) fn new_internal<'a, V, A>( - prefix: &[u8], - allocator: &'a A, -) -> Result, OutOfMemoryError> -where - V: Value, - A: ArtAllocator, -{ - Ok(NewNodeRef { - ptr: node_ptr::new_internal(prefix, allocator)?, - allocator, - extra_nodes: Vec::new(), - }) -} - -pub(crate) fn new_leaf<'a, V, A>( - prefix: &[u8], - value: V, - allocator: &'a A, -) -> Result, OutOfMemoryError> -where - V: Value, - A: ArtAllocator, -{ - Ok(NewNodeRef { - ptr: node_ptr::new_leaf(prefix, value, allocator)?, - allocator, - extra_nodes: Vec::new(), - }) -} diff --git a/libs/neonart/src/allocator.rs b/libs/neonart/src/allocator.rs deleted file mode 100644 index f95e251458..0000000000 --- a/libs/neonart/src/allocator.rs +++ /dev/null @@ -1,156 +0,0 @@ -pub mod block; -mod multislab; -mod slab; -pub mod r#static; - -use std::alloc::Layout; -use std::marker::PhantomData; -use std::mem::MaybeUninit; -use std::sync::atomic::Ordering; - -use crate::allocator::multislab::MultiSlabAllocator; -use crate::allocator::r#static::alloc_from_slice; - -use spin; - -use crate::Tree; -pub use crate::algorithm::node_ptr::{ - NodeInternal4, NodeInternal16, NodeInternal48, NodeInternal256, NodeLeaf, -}; - -#[derive(Debug)] -pub struct OutOfMemoryError(); - -pub trait ArtAllocator { - fn alloc_tree(&self) -> *mut Tree; - - fn alloc_node_internal4(&self) -> *mut NodeInternal4; - fn alloc_node_internal16(&self) -> *mut NodeInternal16; - fn alloc_node_internal48(&self) -> *mut NodeInternal48; - fn alloc_node_internal256(&self) -> *mut NodeInternal256; - fn alloc_node_leaf(&self) -> *mut NodeLeaf; - - fn dealloc_node_internal4(&self, ptr: *mut NodeInternal4); - fn dealloc_node_internal16(&self, ptr: *mut NodeInternal16); - fn dealloc_node_internal48(&self, ptr: *mut NodeInternal48); - fn dealloc_node_internal256(&self, ptr: *mut NodeInternal256); - fn dealloc_node_leaf(&self, ptr: *mut NodeLeaf); -} - -pub struct ArtMultiSlabAllocator<'t, V> -where - V: crate::Value, -{ - tree_area: spin::Mutex>>>, - - pub(crate) inner: MultiSlabAllocator<'t, 5>, - - phantom_val: PhantomData, -} - -impl<'t, V: crate::Value> ArtMultiSlabAllocator<'t, V> { - const LAYOUTS: [Layout; 5] = [ - Layout::new::>(), - Layout::new::>(), - Layout::new::>(), - Layout::new::>(), - Layout::new::>(), - ]; - - pub fn new(area: &'t mut [MaybeUninit]) -> &'t mut ArtMultiSlabAllocator<'t, V> { - let (allocator_area, remain) = alloc_from_slice::>(area); - let (tree_area, remain) = alloc_from_slice::>(remain); - - allocator_area.write(ArtMultiSlabAllocator { - tree_area: spin::Mutex::new(Some(tree_area)), - inner: MultiSlabAllocator::new(remain, &Self::LAYOUTS), - phantom_val: PhantomData, - }) - } -} - -impl<'t, V: crate::Value> ArtAllocator for ArtMultiSlabAllocator<'t, V> { - fn alloc_tree(&self) -> *mut Tree { - let mut t = self.tree_area.lock(); - if let Some(tree_area) = t.take() { - return tree_area.as_mut_ptr().cast(); - } - panic!("cannot allocate more than one tree"); - } - - fn alloc_node_internal4(&self) -> *mut NodeInternal4 { - self.inner.alloc_slab(0).cast() - } - fn alloc_node_internal16(&self) -> *mut NodeInternal16 { - self.inner.alloc_slab(1).cast() - } - fn alloc_node_internal48(&self) -> *mut NodeInternal48 { - self.inner.alloc_slab(2).cast() - } - fn alloc_node_internal256(&self) -> *mut NodeInternal256 { - self.inner.alloc_slab(3).cast() - } - fn alloc_node_leaf(&self) -> *mut NodeLeaf { - self.inner.alloc_slab(4).cast() - } - - fn dealloc_node_internal4(&self, ptr: *mut NodeInternal4) { - self.inner.dealloc_slab(0, ptr.cast()) - } - - fn dealloc_node_internal16(&self, ptr: *mut NodeInternal16) { - self.inner.dealloc_slab(1, ptr.cast()) - } - fn dealloc_node_internal48(&self, ptr: *mut NodeInternal48) { - self.inner.dealloc_slab(2, ptr.cast()) - } - fn dealloc_node_internal256(&self, ptr: *mut NodeInternal256) { - self.inner.dealloc_slab(3, ptr.cast()) - } - fn dealloc_node_leaf(&self, ptr: *mut NodeLeaf) { - self.inner.dealloc_slab(4, ptr.cast()) - } -} - -impl<'t, V: crate::Value> ArtMultiSlabAllocator<'t, V> { - pub(crate) fn get_statistics(&self) -> ArtMultiSlabStats { - ArtMultiSlabStats { - num_internal4: self.inner.slab_descs[0] - .num_allocated - .load(Ordering::Relaxed), - num_internal16: self.inner.slab_descs[1] - .num_allocated - .load(Ordering::Relaxed), - num_internal48: self.inner.slab_descs[2] - .num_allocated - .load(Ordering::Relaxed), - num_internal256: self.inner.slab_descs[3] - .num_allocated - .load(Ordering::Relaxed), - num_leaf: self.inner.slab_descs[4] - .num_allocated - .load(Ordering::Relaxed), - - num_blocks_internal4: self.inner.slab_descs[0].num_blocks.load(Ordering::Relaxed), - num_blocks_internal16: self.inner.slab_descs[1].num_blocks.load(Ordering::Relaxed), - num_blocks_internal48: self.inner.slab_descs[2].num_blocks.load(Ordering::Relaxed), - num_blocks_internal256: self.inner.slab_descs[3].num_blocks.load(Ordering::Relaxed), - num_blocks_leaf: self.inner.slab_descs[4].num_blocks.load(Ordering::Relaxed), - } - } -} - -#[derive(Clone, Debug)] -pub struct ArtMultiSlabStats { - pub num_internal4: u64, - pub num_internal16: u64, - pub num_internal48: u64, - pub num_internal256: u64, - pub num_leaf: u64, - - pub num_blocks_internal4: u64, - pub num_blocks_internal16: u64, - pub num_blocks_internal48: u64, - pub num_blocks_internal256: u64, - pub num_blocks_leaf: u64, -} diff --git a/libs/neonart/src/allocator/block.rs b/libs/neonart/src/allocator/block.rs deleted file mode 100644 index 9c1bb6e176..0000000000 --- a/libs/neonart/src/allocator/block.rs +++ /dev/null @@ -1,191 +0,0 @@ -//! Simple allocator of fixed-size blocks - -use std::mem::MaybeUninit; -use std::sync::atomic::{AtomicU64, Ordering}; - -use spin; - -pub const BLOCK_SIZE: usize = 16 * 1024; - -const INVALID_BLOCK: u64 = u64::MAX; - -pub(crate) struct BlockAllocator<'t> { - blocks_ptr: &'t [MaybeUninit], - num_blocks: u64, - num_initialized: AtomicU64, - - freelist_head: spin::Mutex, -} - -struct FreeListBlock { - inner: spin::Mutex, -} - -struct FreeListBlockInner { - next: u64, - - num_free_blocks: u64, - free_blocks: [u64; 100], // FIXME: fill the rest of the block -} - -impl<'t> BlockAllocator<'t> { - pub(crate) fn new(area: &'t mut [MaybeUninit]) -> Self { - // Use all the space for the blocks - let padding = area.as_ptr().align_offset(BLOCK_SIZE); - let remain = &mut area[padding..]; - - let num_blocks = (remain.len() / BLOCK_SIZE) as u64; - - BlockAllocator { - blocks_ptr: remain, - num_blocks, - num_initialized: AtomicU64::new(0), - freelist_head: spin::Mutex::new(INVALID_BLOCK), - } - } - - /// safety: you must hold a lock on the pointer to this block, otherwise it might get - /// reused for another kind of block - fn read_freelist_block(&self, blkno: u64) -> &FreeListBlock { - let ptr: *const FreeListBlock = self.get_block_ptr(blkno).cast(); - unsafe { ptr.as_ref().unwrap() } - } - - fn get_block_ptr(&self, blkno: u64) -> *mut u8 { - assert!(blkno < self.num_blocks); - unsafe { - self.blocks_ptr - .as_ptr() - .byte_offset(blkno as isize * BLOCK_SIZE as isize) - } - .cast_mut() - .cast() - } - - #[allow(clippy::mut_from_ref)] - pub(crate) fn alloc_block(&self) -> &mut [MaybeUninit] { - // FIXME: handle OOM - let blkno = self.alloc_block_internal(); - if blkno == INVALID_BLOCK { - panic!("out of memory"); - } - - let ptr: *mut MaybeUninit = self.get_block_ptr(blkno).cast(); - unsafe { std::slice::from_raw_parts_mut(ptr, BLOCK_SIZE) } - } - - fn alloc_block_internal(&self) -> u64 { - // check the free list. - { - let mut freelist_head = self.freelist_head.lock(); - if *freelist_head != INVALID_BLOCK { - let freelist_block = self.read_freelist_block(*freelist_head); - - // acquire lock on the freelist block before releasing the lock on the parent (i.e. lock coupling) - let mut g = freelist_block.inner.lock(); - - if g.num_free_blocks > 0 { - g.num_free_blocks -= 1; - let result = g.free_blocks[g.num_free_blocks as usize]; - return result; - } else { - // consume the freelist block itself - let result = *freelist_head; - *freelist_head = g.next; - // This freelist block is now unlinked and can be repurposed - drop(g); - return result; - } - } - } - - // If there are some blocks left that we've never used, pick next such block - let mut next_uninitialized = self.num_initialized.load(Ordering::Relaxed); - while next_uninitialized < self.num_blocks { - match self.num_initialized.compare_exchange( - next_uninitialized, - next_uninitialized + 1, - Ordering::Relaxed, - Ordering::Relaxed, - ) { - Ok(_) => { - return next_uninitialized; - } - Err(old) => { - next_uninitialized = old; - continue; - } - } - } - - // out of blocks - INVALID_BLOCK - } - - // TODO: this is currently unused. The slab allocator never releases blocks - #[allow(dead_code)] - pub(crate) fn release_block(&self, block_ptr: *mut u8) { - let blockno = unsafe { block_ptr.byte_offset_from(self.blocks_ptr) / BLOCK_SIZE as isize }; - self.release_block_internal(blockno as u64); - } - - fn release_block_internal(&self, blockno: u64) { - let mut freelist_head = self.freelist_head.lock(); - if *freelist_head != INVALID_BLOCK { - let freelist_block = self.read_freelist_block(*freelist_head); - - // acquire lock on the freelist block before releasing the lock on the parent (i.e. lock coupling) - let mut g = freelist_block.inner.lock(); - - let num_free_blocks = g.num_free_blocks; - if num_free_blocks < g.free_blocks.len() as u64 { - g.free_blocks[num_free_blocks as usize] = blockno; - g.num_free_blocks += 1; - return; - } - } - - // Convert the block into a new freelist block - let block_ptr: *mut FreeListBlock = self.get_block_ptr(blockno).cast(); - let init = FreeListBlock { - inner: spin::Mutex::new(FreeListBlockInner { - next: *freelist_head, - num_free_blocks: 0, - free_blocks: [INVALID_BLOCK; 100], - }), - }; - unsafe { (*block_ptr) = init }; - *freelist_head = blockno; - } - - // for debugging - pub(crate) fn get_statistics(&self) -> BlockAllocatorStats { - let mut num_free_blocks = 0; - - let mut _prev_lock = None; - let head_lock = self.freelist_head.lock(); - let mut next_blk = *head_lock; - let mut _head_lock = Some(head_lock); - while next_blk != INVALID_BLOCK { - let freelist_block = self.read_freelist_block(next_blk); - let lock = freelist_block.inner.lock(); - num_free_blocks += lock.num_free_blocks; - next_blk = lock.next; - _prev_lock = Some(lock); // hold the lock until we've read the next block - _head_lock = None; - } - - BlockAllocatorStats { - num_blocks: self.num_blocks, - num_initialized: self.num_initialized.load(Ordering::Relaxed), - num_free_blocks, - } - } -} - -#[derive(Clone, Debug)] -pub struct BlockAllocatorStats { - pub num_blocks: u64, - pub num_initialized: u64, - pub num_free_blocks: u64, -} diff --git a/libs/neonart/src/allocator/multislab.rs b/libs/neonart/src/allocator/multislab.rs deleted file mode 100644 index a75b411ec2..0000000000 --- a/libs/neonart/src/allocator/multislab.rs +++ /dev/null @@ -1,33 +0,0 @@ -use std::alloc::Layout; -use std::mem::MaybeUninit; - -use crate::allocator::block::BlockAllocator; -use crate::allocator::slab::SlabDesc; - -pub struct MultiSlabAllocator<'t, const N: usize> { - pub(crate) block_allocator: BlockAllocator<'t>, - - pub(crate) slab_descs: [SlabDesc; N], -} - -impl<'t, const N: usize> MultiSlabAllocator<'t, N> { - pub(crate) fn new( - area: &'t mut [MaybeUninit], - layouts: &[Layout; N], - ) -> MultiSlabAllocator<'t, N> { - let block_allocator = BlockAllocator::new(area); - MultiSlabAllocator { - block_allocator, - - slab_descs: std::array::from_fn(|i| SlabDesc::new(&layouts[i])), - } - } - - pub(crate) fn alloc_slab(&self, slab_idx: usize) -> *mut u8 { - self.slab_descs[slab_idx].alloc_chunk(&self.block_allocator) - } - - pub(crate) fn dealloc_slab(&self, slab_idx: usize, ptr: *mut u8) { - self.slab_descs[slab_idx].dealloc_chunk(ptr, &self.block_allocator) - } -} diff --git a/libs/neonart/src/allocator/slab.rs b/libs/neonart/src/allocator/slab.rs deleted file mode 100644 index 5471d24118..0000000000 --- a/libs/neonart/src/allocator/slab.rs +++ /dev/null @@ -1,433 +0,0 @@ -//! A slab allocator that carves out fixed-size chunks from larger blocks. -//! -//! - -use std::alloc::Layout; -use std::mem::MaybeUninit; -use std::ops::Deref; -use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; - -use spin; - -use super::alloc_from_slice; -use super::block::BlockAllocator; - -use crate::allocator::block::BLOCK_SIZE; - -pub(crate) struct SlabDesc { - pub(crate) layout: Layout, - - block_lists: spin::RwLock, - - pub(crate) num_blocks: AtomicU64, - pub(crate) num_allocated: AtomicU64, -} - -// FIXME: Not sure if SlabDesc is really Sync or Send. It probably is when it's empty, but -// 'block_lists' contains pointers when it's not empty. In the current use as part of the -// the art tree, SlabDescs are only moved during initialization. -unsafe impl Sync for SlabDesc {} -unsafe impl Send for SlabDesc {} - -#[derive(Default, Debug)] -struct BlockLists { - full_blocks: BlockList, - nonfull_blocks: BlockList, -} - -impl BlockLists { - // Unlink a node. It must be in either one of the two lists. - unsafe fn unlink(&mut self, elem: *mut SlabBlockHeader) { - let list = unsafe { - if (*elem).next.is_null() { - if self.full_blocks.tail == elem { - Some(&mut self.full_blocks) - } else { - Some(&mut self.nonfull_blocks) - } - } else if (*elem).prev.is_null() { - if self.full_blocks.head == elem { - Some(&mut self.full_blocks) - } else { - Some(&mut self.nonfull_blocks) - } - } else { - None - } - }; - unsafe { unlink_slab_block(list, elem) }; - } -} - -unsafe fn unlink_slab_block(mut list: Option<&mut BlockList>, elem: *mut SlabBlockHeader) { - unsafe { - if (*elem).next.is_null() { - assert_eq!(list.as_ref().unwrap().tail, elem); - list.as_mut().unwrap().tail = (*elem).prev; - } else { - assert_eq!((*(*elem).next).prev, elem); - (*(*elem).next).prev = (*elem).prev; - } - if (*elem).prev.is_null() { - assert_eq!(list.as_ref().unwrap().head, elem); - list.as_mut().unwrap().head = (*elem).next; - } else { - assert_eq!((*(*elem).prev).next, elem); - (*(*elem).prev).next = (*elem).next; - } - } -} - -#[derive(Debug)] -struct BlockList { - head: *mut SlabBlockHeader, - tail: *mut SlabBlockHeader, -} - -impl Default for BlockList { - fn default() -> Self { - BlockList { - head: std::ptr::null_mut(), - tail: std::ptr::null_mut(), - } - } -} - -impl BlockList { - unsafe fn push_head(&mut self, elem: *mut SlabBlockHeader) { - unsafe { - if self.is_empty() { - self.tail = elem; - (*elem).next = std::ptr::null_mut(); - } else { - (*elem).next = self.head; - (*self.head).prev = elem; - } - (*elem).prev = std::ptr::null_mut(); - self.head = elem; - } - } - - fn is_empty(&self) -> bool { - self.head.is_null() - } - - unsafe fn unlink(&mut self, elem: *mut SlabBlockHeader) { - unsafe { unlink_slab_block(Some(self), elem) } - } - - #[cfg(test)] - fn dump(&self) { - let mut next = self.head; - - while !next.is_null() { - let n = unsafe { next.as_ref() }.unwrap(); - eprintln!( - " blk {:?} (free {}/{})", - next, - n.num_free_chunks.load(Ordering::Relaxed), - n.num_chunks - ); - next = n.next; - } - } -} - -impl SlabDesc { - pub(crate) fn new(layout: &Layout) -> SlabDesc { - SlabDesc { - layout: *layout, - block_lists: spin::RwLock::new(BlockLists::default()), - num_allocated: AtomicU64::new(0), - num_blocks: AtomicU64::new(0), - } - } -} - -#[derive(Debug)] -struct SlabBlockHeader { - free_chunks_head: spin::Mutex<*mut FreeChunk>, - num_free_chunks: AtomicU32, - num_chunks: u32, // this is really a constant for a given Layout - - // these fields are protected by the lock on the BlockLists - prev: *mut SlabBlockHeader, - next: *mut SlabBlockHeader, -} - -struct FreeChunk { - next: *mut FreeChunk, -} - -enum ReadOrWriteGuard<'a, T> { - Read(spin::RwLockReadGuard<'a, T>), - Write(spin::RwLockWriteGuard<'a, T>), -} - -impl<'a, T> Deref for ReadOrWriteGuard<'a, T> { - type Target = T; - - fn deref(&self) -> &::Target { - match self { - ReadOrWriteGuard::Read(g) => g.deref(), - ReadOrWriteGuard::Write(g) => g.deref(), - } - } -} - -impl SlabDesc { - pub fn alloc_chunk(&self, block_allocator: &BlockAllocator) -> *mut u8 { - // Are there any free chunks? - let mut acquire_write = false; - 'outer: loop { - let mut block_lists_guard = if acquire_write { - ReadOrWriteGuard::Write(self.block_lists.write()) - } else { - ReadOrWriteGuard::Read(self.block_lists.read()) - }; - 'inner: loop { - let block_ptr = block_lists_guard.nonfull_blocks.head; - if block_ptr.is_null() { - break 'outer; - } - unsafe { - let mut free_chunks_head = (*block_ptr).free_chunks_head.lock(); - if !(*free_chunks_head).is_null() { - let result = *free_chunks_head; - (*free_chunks_head) = (*result).next; - let _old = (*block_ptr).num_free_chunks.fetch_sub(1, Ordering::Relaxed); - - self.num_allocated.fetch_add(1, Ordering::Relaxed); - return result.cast(); - } - } - - // The block at the head of the list was full. Grab write lock and retry - match block_lists_guard { - ReadOrWriteGuard::Read(_) => { - acquire_write = true; - continue 'outer; - } - ReadOrWriteGuard::Write(ref mut g) => { - // move the node to the list of full blocks - unsafe { - g.nonfull_blocks.unlink(block_ptr); - g.full_blocks.push_head(block_ptr); - }; - continue 'inner; - } - } - } - } - - // no free chunks. Allocate a new block (and the chunk from that) - let (new_block, new_chunk) = self.alloc_block_and_chunk(block_allocator); - self.num_blocks.fetch_add(1, Ordering::Relaxed); - - // Add the block to the list in the SlabDesc - unsafe { - let mut block_lists_guard = self.block_lists.write(); - block_lists_guard.nonfull_blocks.push_head(new_block); - } - self.num_allocated.fetch_add(1, Ordering::Relaxed); - new_chunk - } - - pub fn dealloc_chunk(&self, chunk_ptr: *mut u8, _block_allocator: &BlockAllocator) { - // Find the block it belongs to. You can find the block from the address. (And knowing the - // layout, you could calculate the chunk number too.) - let block_ptr: *mut SlabBlockHeader = { - let block_addr = (chunk_ptr.addr() / BLOCK_SIZE) * BLOCK_SIZE; - chunk_ptr.with_addr(block_addr).cast() - }; - let chunk_ptr: *mut FreeChunk = chunk_ptr.cast(); - - // Mark the chunk as free in 'freechunks' list - let num_chunks; - let num_free_chunks; - unsafe { - let mut free_chunks_head = (*block_ptr).free_chunks_head.lock(); - (*chunk_ptr).next = *free_chunks_head; - *free_chunks_head = chunk_ptr; - - num_free_chunks = (*block_ptr).num_free_chunks.fetch_add(1, Ordering::Relaxed) + 1; - num_chunks = (*block_ptr).num_chunks; - } - - if num_free_chunks == 1 { - // If the block was full previously, add it to the nonfull blocks list. Note that - // we're not holding the lock anymore, so it can immediately become full again. - // That's harmless, it will be moved back to the full list again when a call - // to alloc_chunk() sees it. - let mut block_lists = self.block_lists.write(); - unsafe { - block_lists.unlink(block_ptr); - block_lists.nonfull_blocks.push_head(block_ptr); - }; - } else if num_free_chunks == num_chunks { - // If the block became completely empty, move it to the free list - // TODO - // FIXME: we're still holding the spinlock. It's not exactly safe to return it to - // the free blocks list, is it? Defer it as garbage to wait out concurrent updates? - //block_allocator.release_block() - } - - // update stats - self.num_allocated.fetch_sub(1, Ordering::Relaxed); - } - - fn alloc_block_and_chunk( - &self, - block_allocator: &BlockAllocator, - ) -> (*mut SlabBlockHeader, *mut u8) { - // fixme: handle OOM - let block_slice: &mut [MaybeUninit] = block_allocator.alloc_block(); - let (block_header, remain) = alloc_from_slice::(block_slice); - - let padding = remain.as_ptr().align_offset(self.layout.align()); - - let num_chunks = (remain.len() - padding) / self.layout.size(); - - let first_chunk_ptr: *mut FreeChunk = remain[padding..].as_mut_ptr().cast(); - - unsafe { - let mut chunk_ptr = first_chunk_ptr; - for _ in 0..num_chunks - 1 { - let next_chunk_ptr = chunk_ptr.byte_add(self.layout.size()); - (*chunk_ptr).next = next_chunk_ptr; - chunk_ptr = next_chunk_ptr; - } - (*chunk_ptr).next = std::ptr::null_mut(); - - let result_chunk = first_chunk_ptr; - - let block_header = block_header.write(SlabBlockHeader { - free_chunks_head: spin::Mutex::new((*first_chunk_ptr).next), - prev: std::ptr::null_mut(), - next: std::ptr::null_mut(), - num_chunks: num_chunks as u32, - num_free_chunks: AtomicU32::new(num_chunks as u32 - 1), - }); - - (block_header, result_chunk.cast()) - } - } - - #[cfg(test)] - fn dump(&self) { - eprintln!( - "slab dump ({} blocks, {} allocated chunks)", - self.num_blocks.load(Ordering::Relaxed), - self.num_allocated.load(Ordering::Relaxed) - ); - let lists = self.block_lists.read(); - - eprintln!("nonfull blocks:"); - lists.nonfull_blocks.dump(); - eprintln!("full blocks:"); - lists.full_blocks.dump(); - } -} - -#[cfg(test)] -mod tests { - use super::*; - - use rand::Rng; - use rand_distr::Zipf; - - struct TestObject { - val: usize, - _dummy: [u8; BLOCK_SIZE / 4], - } - - struct TestObjectSlab<'a>(SlabDesc, BlockAllocator<'a>); - impl<'a> TestObjectSlab<'a> { - fn new(block_allocator: BlockAllocator) -> TestObjectSlab { - TestObjectSlab(SlabDesc::new(&Layout::new::()), block_allocator) - } - - fn alloc(&self, val: usize) -> *mut TestObject { - let obj: *mut TestObject = self.0.alloc_chunk(&self.1).cast(); - unsafe { (*obj).val = val }; - obj - } - - fn dealloc(&self, obj: *mut TestObject) { - self.0.dealloc_chunk(obj.cast(), &self.1) - } - } - - #[test] - fn test_slab_alloc() { - const MEM_SIZE: usize = 100000000; - let mut area = Box::new_uninit_slice(MEM_SIZE); - let block_allocator = BlockAllocator::new(&mut area); - - let slab = TestObjectSlab::new(block_allocator); - - let mut all: Vec<*mut TestObject> = Vec::new(); - for i in 0..11 { - all.push(slab.alloc(i)); - } - #[allow(clippy::needless_range_loop)] - for i in 0..11 { - assert!(unsafe { (*all[i]).val == i }); - } - - let distribution = Zipf::new(10.0, 1.1).unwrap(); - let mut rng = rand::rng(); - for _ in 0..100000 { - slab.0.dump(); - let idx = rng.sample(distribution) as usize; - let ptr: *mut TestObject = all[idx]; - if !ptr.is_null() { - assert_eq!(unsafe { (*ptr).val }, idx); - slab.dealloc(ptr); - all[idx] = std::ptr::null_mut(); - } else { - all[idx] = slab.alloc(idx); - } - } - } - - fn new_test_blk(i: u32) -> *mut SlabBlockHeader { - Box::into_raw(Box::new(SlabBlockHeader { - free_chunks_head: spin::Mutex::new(std::ptr::null_mut()), - num_free_chunks: AtomicU32::new(0), - num_chunks: i, - prev: std::ptr::null_mut(), - next: std::ptr::null_mut(), - })) - } - - #[test] - fn test_block_linked_list() { - // note: these are leaked, but that's OK for tests - let a = new_test_blk(0); - let b = new_test_blk(1); - - let mut list = BlockList::default(); - assert!(list.is_empty()); - - unsafe { - list.push_head(a); - assert!(!list.is_empty()); - list.unlink(a); - } - assert!(list.is_empty()); - - unsafe { - list.push_head(b); - list.push_head(a); - assert_eq!(list.head, a); - assert_eq!((*a).next, b); - assert_eq!((*b).prev, a); - assert_eq!(list.tail, b); - - list.unlink(a); - list.unlink(b); - assert!(list.is_empty()); - } - } -} diff --git a/libs/neonart/src/allocator/static.rs b/libs/neonart/src/allocator/static.rs deleted file mode 100644 index ab1683c411..0000000000 --- a/libs/neonart/src/allocator/static.rs +++ /dev/null @@ -1,44 +0,0 @@ -use std::mem::MaybeUninit; - -pub fn alloc_from_slice( - area: &mut [MaybeUninit], -) -> (&mut MaybeUninit, &mut [MaybeUninit]) { - let layout = std::alloc::Layout::new::(); - - let area_start = area.as_mut_ptr(); - - // pad to satisfy alignment requirements - let padding = area_start.align_offset(layout.align()); - if padding + layout.size() > area.len() { - panic!("out of memory"); - } - let area = &mut area[padding..]; - let (result_area, remain) = area.split_at_mut(layout.size()); - - let result_ptr: *mut MaybeUninit = result_area.as_mut_ptr().cast(); - let result = unsafe { result_ptr.as_mut().unwrap() }; - - (result, remain) -} - -pub fn alloc_array_from_slice( - area: &mut [MaybeUninit], - len: usize, -) -> (&mut [MaybeUninit], &mut [MaybeUninit]) { - let layout = std::alloc::Layout::new::(); - - let area_start = area.as_mut_ptr(); - - // pad to satisfy alignment requirements - let padding = area_start.align_offset(layout.align()); - if padding + layout.size() * len > area.len() { - panic!("out of memory"); - } - let area = &mut area[padding..]; - let (result_area, remain) = area.split_at_mut(layout.size() * len); - - let result_ptr: *mut MaybeUninit = result_area.as_mut_ptr().cast(); - let result = unsafe { std::slice::from_raw_parts_mut(result_ptr.as_mut().unwrap(), len) }; - - (result, remain) -} diff --git a/libs/neonart/src/epoch.rs b/libs/neonart/src/epoch.rs deleted file mode 100644 index a1a112bd17..0000000000 --- a/libs/neonart/src/epoch.rs +++ /dev/null @@ -1,142 +0,0 @@ -//! This is similar to crossbeam_epoch crate, but works in shared memory - -use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; - -use crossbeam_utils::CachePadded; - -const NUM_SLOTS: usize = 1000; - -/// This is the struct that is stored in shmem -/// -/// bit 0: is it pinned or not? -/// rest of the bits are the epoch counter. -pub struct EpochShared { - global_epoch: AtomicU64, - participants: [CachePadded; NUM_SLOTS], - - broadcast_lock: spin::Mutex<()>, -} - -impl EpochShared { - pub fn new() -> EpochShared { - EpochShared { - global_epoch: AtomicU64::new(2), - participants: [const { CachePadded::new(AtomicU64::new(2)) }; NUM_SLOTS], - broadcast_lock: spin::Mutex::new(()), - } - } - - pub fn register(&self) -> LocalHandle { - LocalHandle { - global: self, - last_slot: AtomicUsize::new(0), // todo: choose more intelligently - } - } - - fn release_pin(&self, slot: usize, _epoch: u64) { - let global_epoch = self.global_epoch.load(Ordering::Relaxed); - self.participants[slot].store(global_epoch, Ordering::Relaxed); - } - - fn pin_internal(&self, slot_hint: usize) -> (usize, u64) { - // pick a slot - let mut slot = slot_hint; - let epoch = loop { - let old = self.participants[slot].fetch_or(1, Ordering::Relaxed); - if old & 1 == 0 { - // Got this slot - break old; - } - - // the slot was busy by another thread / process. try a different slot - slot += 1; - if slot == NUM_SLOTS { - slot = 0; - } - continue; - }; - (slot, epoch) - } - - pub(crate) fn advance(&self) -> u64 { - // Advance the global epoch - let old_epoch = self.global_epoch.fetch_add(2, Ordering::Relaxed); - // Anyone that release their pin after this will update their slot. - old_epoch + 2 - } - - pub(crate) fn broadcast(&self) { - let Some(_guard) = self.broadcast_lock.try_lock() else { - return; - }; - - let epoch = self.global_epoch.load(Ordering::Relaxed); - let old_epoch = epoch.wrapping_sub(2); - - // Update all free slots. - for i in 0..NUM_SLOTS { - // TODO: check result, as a sanity check. It should either be the old epoch, or pinned - let _ = self.participants[i].compare_exchange( - old_epoch, - epoch, - Ordering::Relaxed, - Ordering::Relaxed, - ); - } - - // FIXME: memory fence here, since we used Relaxed? - } - - pub(crate) fn get_oldest(&self) -> u64 { - // Read all slots. - let now = self.global_epoch.load(Ordering::Relaxed); - let mut oldest = now; - for i in 0..NUM_SLOTS { - let this_epoch = self.participants[i].load(Ordering::Relaxed); - let delta = now.wrapping_sub(this_epoch); - if delta > u64::MAX / 2 { - // this is very recent - } else if delta > now.wrapping_sub(oldest) { - oldest = this_epoch; - } - } - oldest - } - - pub(crate) fn get_current(&self) -> u64 { - self.global_epoch.load(Ordering::Relaxed) - } -} - -pub(crate) struct EpochPin<'e> { - slot: usize, - pub(crate) epoch: u64, - - handle: &'e LocalHandle<'e>, -} - -impl<'e> Drop for EpochPin<'e> { - fn drop(&mut self) { - self.handle.global.release_pin(self.slot, self.epoch); - } -} - -pub struct LocalHandle<'g> { - global: &'g EpochShared, - - last_slot: AtomicUsize, -} - -impl<'g> LocalHandle<'g> { - pub fn pin(&self) -> EpochPin { - let (slot, epoch) = self - .global - .pin_internal(self.last_slot.load(Ordering::Relaxed)); - self.last_slot.store(slot, Ordering::Relaxed); - EpochPin { - handle: self, - epoch, - slot, - } - } -} diff --git a/libs/neonart/src/lib.rs b/libs/neonart/src/lib.rs deleted file mode 100644 index 5c1c36a91e..0000000000 --- a/libs/neonart/src/lib.rs +++ /dev/null @@ -1,583 +0,0 @@ -//! Adaptive Radix Tree (ART) implementation, with Optimistic Lock Coupling. -//! -//! The data structure is described in these two papers: -//! -//! [1] Leis, V. & Kemper, Alfons & Neumann, Thomas. (2013). -//! The adaptive radix tree: ARTful indexing for main-memory databases. -//! Proceedings - International Conference on Data Engineering. 38-49. 10.1109/ICDE.2013.6544812. -//! https://db.in.tum.de/~leis/papers/ART.pdf -//! -//! [2] Leis, Viktor & Scheibner, Florian & Kemper, Alfons & Neumann, Thomas. (2016). -//! The ART of practical synchronization. -//! 1-8. 10.1145/2933349.2933352. -//! https://db.in.tum.de/~leis/papers/artsync.pdf -//! -//! [1] describes the base data structure, and [2] describes the Optimistic Lock Coupling that we -//! use. -//! -//! The papers mention a few different variants. We have made the following choices in this -//! implementation: -//! -//! - All keys have the same length -//! -//! - Single-value leaves. -//! -//! - For collapsing inner nodes, we use the Pessimistic approach, where each inner node stores a -//! variable length "prefix", which stores the keys of all the one-way nodes which have been -//! removed. However, similar to the "hybrid" approach described in the paper, each node only has -//! space for a constant-size prefix of 8 bytes. If a node would have a longer prefix, then we -//! create create one-way nodes to store them. (There was no particular reason for this choice, -//! the "hybrid" approach described in the paper might be better.) -//! -//! - For concurrency, we use Optimistic Lock Coupling. The paper [2] also describes another method, -//! ROWEX, which generally performs better when there is contention, but that is not important -//! for use and Optimisic Lock Coupling is simpler to implement. -//! -//! ## Requirements -//! -//! This data structure is currently used for the integrated LFC, relsize and last-written LSN cache -//! in the compute communicator, part of the 'neon' Postgres extension. We have some unique -//! requirements, which is why we had to write our own. Namely: -//! -//! - The data structure has to live in fixed-sized shared memory segment. That rules out any -//! built-in Rust collections and most crates. (Except possibly with the 'allocator_api' rust -//! feature, which still nightly-only experimental as of this writing). -//! -//! - The data structure is accessed from multiple processes. Only one process updates the data -//! structure, but other processes perform reads. That rules out using built-in Rust locking -//! primitives like Mutex and RwLock, and most crates too. -//! -//! - Within the one process with write-access, multiple threads can perform updates concurrently. -//! That rules out using PostgreSQL LWLocks for the locking. -//! -//! The implementation is generic, and doesn't depend on any PostgreSQL specifics, but it has been -//! written with that usage and the above constraints in mind. Some noteworthy assumptions: -//! -//! - Contention is assumed to be rare. In the integrated cache in PostgreSQL, there's higher level -//! locking in the PostgreSQL buffer manager, which ensures that two backends should not try to -//! read / write the same page at the same time. (Prefetching can conflict with actual reads, -//! however.) -//! -//! - The keys in the integrated cache are 17 bytes long. -//! -//! ## Usage -//! -//! Because this is designed to be used as a Postgres shared memory data structure, initialization -//! happens in three stages: -//! -//! 0. A fixed area of shared memory is allocated at postmaster startup. -//! -//! 1. TreeInitStruct::new() is called to initialize it, still in Postmaster process, before any -//! other process or thread is running. It returns a TreeInitStruct, which is inherited by all -//! the processes through fork(). -//! -//! 2. One process may have write-access to the struct, by calling -//! [TreeInitStruct::attach_writer]. (That process is the communicator process.) -//! -//! 3. Other processes get read-access to the struct, by calling [TreeInitStruct::attach_reader] -//! -//! "Write access" means that you can insert / update / delete values in the tree. -//! -//! NOTE: The Values stored in the tree are sometimes moved, when a leaf node fills up and a new -//! larger node needs to be allocated. The versioning and epoch-based allocator ensure that the data -//! structure stays consistent, but if the Value has interior mutability, like atomic fields, -//! updates to such fields might be lost if the leaf node is concurrently moved! If that becomes a -//! problem, the version check could be passed up to the caller, so that the caller could detect the -//! lost updates and retry the operation. -//! -//! ## Implementation -//! -//! node_ptr: Provides low-level implementations of the four different node types (eight actually, -//! since there is an Internal and Leaf variant of each) -//! -//! lock_and_version.rs: Provides an abstraction for the combined lock and version counter on each -//! node. -//! -//! node_ref.rs: The code in node_ptr.rs deals with raw pointers. node_ref.rs provides more type-safe -//! abstractions on top. -//! -//! algorithm.rs: Contains the functions to implement lookups and updates in the tree -//! -//! allocator.rs: Provides a facility to allocate memory for the tree nodes. (We must provide our -//! own abstraction for that because we need the data structure to live in a pre-allocated shared -//! memory segment). -//! -//! epoch.rs: The data structure requires that when a node is removed from the tree, it is not -//! immediately deallocated, but stays around for as long as concurrent readers might still have -//! pointers to them. This is enforced by an epoch system. This is similar to -//! e.g. crossbeam_epoch, but we couldn't use that either because it has to work across processes -//! communicating over the shared memory segment. -//! -//! ## See also -//! -//! There are some existing Rust ART implementations out there, but none of them filled all -//! the requirements: -//! -//! - https://github.com/XiangpengHao/congee -//! - https://github.com/declanvk/blart -//! -//! ## TODO -//! -//! - Removing values has not been implemented - -mod algorithm; -pub mod allocator; -mod epoch; - -use algorithm::RootPtr; -use algorithm::node_ptr::NodePtr; - -use std::collections::VecDeque; -use std::fmt::Debug; -use std::marker::PhantomData; -use std::ptr::NonNull; -use std::sync::atomic::{AtomicBool, Ordering}; - -use crate::epoch::EpochPin; - -#[cfg(test)] -mod tests; - -use allocator::ArtAllocator; -pub use allocator::ArtMultiSlabAllocator; -pub use allocator::OutOfMemoryError; - -/// Fixed-length key type. -/// -pub trait Key: Debug { - const KEY_LEN: usize; - - fn as_bytes(&self) -> &[u8]; -} - -/// Values stored in the tree -/// -/// Values need to be Cloneable, because when a node "grows", the value is copied to a new node and -/// the old sticks around until all readers that might see the old value are gone. -// fixme obsolete, no longer needs Clone -pub trait Value {} - -const MAX_GARBAGE: usize = 1024; - -/// The root of the tree, plus other tree-wide data. This is stored in the shared memory. -pub struct Tree { - /// For simplicity, so that we never need to grow or shrink the root, the root node is always an - /// Internal256 node. Also, it never has a prefix (that's actually a bit wasteful, incurring one - /// indirection to every lookup) - root: RootPtr, - - writer_attached: AtomicBool, - - epoch: epoch::EpochShared, -} - -unsafe impl Sync for Tree {} -unsafe impl Send for Tree {} - -struct GarbageQueue(VecDeque<(NodePtr, u64)>); - -unsafe impl Sync for GarbageQueue {} -unsafe impl Send for GarbageQueue {} - -impl GarbageQueue { - fn new() -> GarbageQueue { - GarbageQueue(VecDeque::with_capacity(MAX_GARBAGE)) - } - - fn remember_obsolete_node(&mut self, ptr: NodePtr, epoch: u64) { - self.0.push_front((ptr, epoch)); - } - - fn next_obsolete(&mut self, cutoff_epoch: u64) -> Option> { - if let Some(back) = self.0.back() { - if back.1 < cutoff_epoch { - return Some(self.0.pop_back().unwrap().0); - } - } - None - } -} - -/// Struct created at postmaster startup -pub struct TreeInitStruct<'t, K: Key, V: Value, A: ArtAllocator> { - tree: &'t Tree, - - allocator: &'t A, - - phantom_key: PhantomData, -} - -/// The worker process has a reference to this. The write operations are only safe -/// from the worker process -pub struct TreeWriteAccess<'t, K: Key, V: Value, A: ArtAllocator> -where - K: Key, - V: Value, -{ - tree: &'t Tree, - - pub allocator: &'t A, - - epoch_handle: epoch::LocalHandle<'t>, - - phantom_key: PhantomData, - - /// Obsolete nodes that cannot be recycled until their epoch expires. - garbage: spin::Mutex>, -} - -/// The backends have a reference to this. It cannot be used to modify the tree -pub struct TreeReadAccess<'t, K: Key, V: Value> -where - K: Key, - V: Value, -{ - tree: &'t Tree, - - epoch_handle: epoch::LocalHandle<'t>, - - phantom_key: PhantomData, -} - -impl<'t, K: Key, V: Value, A: ArtAllocator> TreeInitStruct<'t, K, V, A> { - pub fn new(allocator: &'t A) -> TreeInitStruct<'t, K, V, A> { - let tree_ptr = allocator.alloc_tree(); - let tree_ptr = NonNull::new(tree_ptr).expect("out of memory"); - let init = Tree { - root: algorithm::new_root(allocator).expect("out of memory"), - writer_attached: AtomicBool::new(false), - epoch: epoch::EpochShared::new(), - }; - unsafe { tree_ptr.write(init) }; - - TreeInitStruct { - tree: unsafe { tree_ptr.as_ref() }, - allocator, - phantom_key: PhantomData, - } - } - - pub fn attach_writer(self) -> TreeWriteAccess<'t, K, V, A> { - let previously_attached = self.tree.writer_attached.swap(true, Ordering::Relaxed); - if previously_attached { - panic!("writer already attached"); - } - TreeWriteAccess { - tree: self.tree, - allocator: self.allocator, - phantom_key: PhantomData, - epoch_handle: self.tree.epoch.register(), - garbage: spin::Mutex::new(GarbageQueue::new()), - } - } - - pub fn attach_reader(self) -> TreeReadAccess<'t, K, V> { - TreeReadAccess { - tree: self.tree, - phantom_key: PhantomData, - epoch_handle: self.tree.epoch.register(), - } - } -} - -impl<'t, K: Key, V: Value, A: ArtAllocator> TreeWriteAccess<'t, K, V, A> { - pub fn start_write<'g>(&'t self) -> TreeWriteGuard<'g, K, V, A> - where - 't: 'g, - { - TreeWriteGuard { - tree_writer: self, - epoch_pin: self.epoch_handle.pin(), - phantom_key: PhantomData, - created_garbage: false, - } - } - - pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> { - TreeReadGuard { - tree: self.tree, - epoch_pin: self.epoch_handle.pin(), - phantom_key: PhantomData, - } - } -} - -impl<'t, K: Key, V: Value> TreeReadAccess<'t, K, V> { - pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> { - TreeReadGuard { - tree: self.tree, - epoch_pin: self.epoch_handle.pin(), - phantom_key: PhantomData, - } - } -} - -pub struct TreeReadGuard<'e, K, V> -where - K: Key, - V: Value, -{ - tree: &'e Tree, - - epoch_pin: EpochPin<'e>, - phantom_key: PhantomData, -} - -impl<'e, K: Key, V: Value> TreeReadGuard<'e, K, V> { - pub fn get(&'e self, key: &K) -> Option<&'e V> { - algorithm::search(key, self.tree.root, &self.epoch_pin) - } -} - -pub struct TreeWriteGuard<'e, K, V, A> -where - K: Key, - V: Value, - A: ArtAllocator, -{ - tree_writer: &'e TreeWriteAccess<'e, K, V, A>, - - epoch_pin: EpochPin<'e>, - phantom_key: PhantomData, - - created_garbage: bool, -} - -pub enum UpdateAction { - Nothing, - Insert(V), - Remove, -} - -impl<'e, K: Key, V: Value, A: ArtAllocator> TreeWriteGuard<'e, K, V, A> { - /// Get a value - pub fn get(&'e mut self, key: &K) -> Option<&'e V> { - algorithm::search(key, self.tree_writer.tree.root, &self.epoch_pin) - } - - /// Insert a value - pub fn insert(self, key: &K, value: V) -> Result { - let mut success = None; - - self.update_with_fn(key, |existing| { - if existing.is_some() { - success = Some(false); - UpdateAction::Nothing - } else { - success = Some(true); - UpdateAction::Insert(value) - } - })?; - Ok(success.expect("value_fn not called")) - } - - /// Remove value. Returns true if it existed - pub fn remove(self, key: &K) -> bool { - let mut result = false; - // FIXME: It's not clear if OOM is expected while removing. It seems - // not nice, but shrinking a node can OOM. Then again, we could opt - // to not shrink a node if we cannot allocate, to live a little longer. - self.update_with_fn(key, |existing| match existing { - Some(_) => { - result = true; - UpdateAction::Remove - } - None => UpdateAction::Nothing, - }) - .expect("out of memory while removing"); - result - } - - /// Try to remove value and return the old value. - pub fn remove_and_return(self, key: &K) -> Option - where - V: Clone, - { - let mut old = None; - self.update_with_fn(key, |existing| { - old = existing.cloned(); - UpdateAction::Remove - }) - .expect("out of memory while removing"); - old - } - - /// Update key using the given function. All the other modifying operations are based on this. - /// - /// The function is passed a reference to the existing value, if any. If the function - /// returns None, the value is removed from the tree (or if there was no existing value, - /// does nothing). If the function returns Some, the existing value is replaced, of if there - /// was no existing value, it is inserted. FIXME: update comment - pub fn update_with_fn(mut self, key: &K, value_fn: F) -> Result<(), OutOfMemoryError> - where - F: FnOnce(Option<&V>) -> UpdateAction, - { - algorithm::update_fn(key, value_fn, self.tree_writer.tree.root, &mut self)?; - - if self.created_garbage { - let _ = self.collect_garbage(); - } - Ok(()) - } - - fn remember_obsolete_node(&mut self, ptr: NodePtr) { - self.tree_writer - .garbage - .lock() - .remember_obsolete_node(ptr, self.epoch_pin.epoch); - self.created_garbage = true; - } - - // returns number of nodes recycled - fn collect_garbage(&self) -> usize { - self.tree_writer.tree.epoch.advance(); - self.tree_writer.tree.epoch.broadcast(); - - let cutoff_epoch = self.tree_writer.tree.epoch.get_oldest(); - - let mut result = 0; - let mut garbage_queue = self.tree_writer.garbage.lock(); - while let Some(ptr) = garbage_queue.next_obsolete(cutoff_epoch) { - ptr.deallocate(self.tree_writer.allocator); - result += 1; - } - result - } -} - -pub struct TreeIterator -where - K: Key + for<'a> From<&'a [u8]>, -{ - done: bool, - pub next_key: Vec, - max_key: Option>, - - phantom_key: PhantomData, -} - -impl TreeIterator -where - K: Key + for<'a> From<&'a [u8]>, -{ - pub fn new_wrapping() -> TreeIterator { - TreeIterator { - done: false, - next_key: vec![0; K::KEY_LEN], - max_key: None, - phantom_key: PhantomData, - } - } - - pub fn new(range: &std::ops::Range) -> TreeIterator { - let result = TreeIterator { - done: false, - next_key: Vec::from(range.start.as_bytes()), - max_key: Some(Vec::from(range.end.as_bytes())), - phantom_key: PhantomData, - }; - assert_eq!(result.next_key.len(), K::KEY_LEN); - assert_eq!(result.max_key.as_ref().unwrap().len(), K::KEY_LEN); - - result - } - - pub fn next<'g, V>(&mut self, read_guard: &'g TreeReadGuard<'g, K, V>) -> Option<(K, &'g V)> - where - V: Value, - { - if self.done { - return None; - } - - let mut wrapped_around = false; - loop { - assert_eq!(self.next_key.len(), K::KEY_LEN); - if let Some((k, v)) = - algorithm::iter_next(&self.next_key, read_guard.tree.root, &read_guard.epoch_pin) - { - assert_eq!(k.len(), K::KEY_LEN); - assert_eq!(self.next_key.len(), K::KEY_LEN); - - // Check if we reached the end of the range - if let Some(max_key) = &self.max_key { - if k.as_slice() >= max_key.as_slice() { - self.done = true; - break None; - } - } - - // increment the key - self.next_key = k.clone(); - increment_key(self.next_key.as_mut_slice()); - let k = k.as_slice().into(); - - break Some((k, v)); - } else { - if self.max_key.is_some() { - self.done = true; - } else { - // Start from beginning - if !wrapped_around { - for i in 0..K::KEY_LEN { - self.next_key[i] = 0; - } - wrapped_around = true; - continue; - } else { - // The tree is completely empty - // FIXME: perhaps we should remember the starting point instead. - // Currently this will scan some ranges twice. - break None; - } - } - break None; - } - } - } -} - -fn increment_key(key: &mut [u8]) -> bool { - for i in (0..key.len()).rev() { - let (byte, overflow) = key[i].overflowing_add(1); - key[i] = byte; - if !overflow { - return false; - } - } - true -} - -// Debugging functions -impl<'e, K: Key, V: Value + Debug, A: ArtAllocator> TreeWriteGuard<'e, K, V, A> { - pub fn dump(&mut self, dst: &mut dyn std::io::Write) { - algorithm::dump_tree(self.tree_writer.tree.root, &self.epoch_pin, dst) - } -} -impl<'e, K: Key, V: Value + Debug> TreeReadGuard<'e, K, V> { - pub fn dump(&mut self, dst: &mut dyn std::io::Write) { - algorithm::dump_tree(self.tree.root, &self.epoch_pin, dst) - } -} -impl<'e, K: Key, V: Value> TreeWriteAccess<'e, K, V, ArtMultiSlabAllocator<'e, V>> { - pub fn get_statistics(&self) -> ArtTreeStatistics { - self.allocator.get_statistics(); - ArtTreeStatistics { - blocks: self.allocator.inner.block_allocator.get_statistics(), - slabs: self.allocator.get_statistics(), - epoch: self.tree.epoch.get_current(), - oldest_epoch: self.tree.epoch.get_oldest(), - num_garbage: self.garbage.lock().0.len() as u64, - } - } -} - -#[derive(Clone, Debug)] -pub struct ArtTreeStatistics { - pub blocks: allocator::block::BlockAllocatorStats, - pub slabs: allocator::ArtMultiSlabStats, - - pub epoch: u64, - pub oldest_epoch: u64, - pub num_garbage: u64, -} diff --git a/libs/neonart/src/tests.rs b/libs/neonart/src/tests.rs deleted file mode 100644 index 2afe41517c..0000000000 --- a/libs/neonart/src/tests.rs +++ /dev/null @@ -1,236 +0,0 @@ -use std::collections::BTreeMap; -use std::collections::HashSet; -use std::fmt::{Debug, Formatter}; -use std::sync::atomic::{AtomicUsize, Ordering}; - -use crate::ArtAllocator; -use crate::ArtMultiSlabAllocator; -use crate::TreeInitStruct; -use crate::TreeIterator; -use crate::TreeWriteAccess; -use crate::UpdateAction; - -use crate::{Key, Value}; - -use rand::Rng; -use rand::seq::SliceRandom; -use rand_distr::Zipf; - -const TEST_KEY_LEN: usize = 16; - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -struct TestKey([u8; TEST_KEY_LEN]); - -impl TestKey { - const MIN: TestKey = TestKey([0; TEST_KEY_LEN]); - const MAX: TestKey = TestKey([u8::MAX; TEST_KEY_LEN]); -} - -impl Key for TestKey { - const KEY_LEN: usize = TEST_KEY_LEN; - fn as_bytes(&self) -> &[u8] { - &self.0 - } -} - -impl From<&TestKey> for u128 { - fn from(val: &TestKey) -> u128 { - u128::from_be_bytes(val.0) - } -} - -impl From for TestKey { - fn from(val: u128) -> TestKey { - TestKey(val.to_be_bytes()) - } -} - -impl<'a> From<&'a [u8]> for TestKey { - fn from(bytes: &'a [u8]) -> TestKey { - TestKey(bytes.try_into().unwrap()) - } -} - -impl Value for usize {} - -fn test_inserts + Copy>(keys: &[K]) { - const MEM_SIZE: usize = 10000000; - let mut area = Box::new_uninit_slice(MEM_SIZE); - - let allocator = ArtMultiSlabAllocator::new(&mut area); - - let init_struct = TreeInitStruct::::new(allocator); - let tree_writer = init_struct.attach_writer(); - - for (idx, k) in keys.iter().enumerate() { - let w = tree_writer.start_write(); - let res = w.insert(&(*k).into(), idx); - assert!(res.is_ok()); - } - - for (idx, k) in keys.iter().enumerate() { - let r = tree_writer.start_read(); - let value = r.get(&(*k).into()); - assert_eq!(value, Some(idx).as_ref()); - } - - eprintln!("stats: {:?}", tree_writer.get_statistics()); -} - -#[test] -fn dense() { - // This exercises splitting a node with prefix - let keys: &[u128] = &[0, 1, 2, 3, 256]; - test_inserts(keys); - - // Dense keys - let mut keys: Vec = (0..10000).collect(); - test_inserts(&keys); - - // Do the same in random orders - for _ in 1..10 { - keys.shuffle(&mut rand::rng()); - test_inserts(&keys); - } -} - -#[test] -fn sparse() { - // sparse keys - let mut keys: Vec = Vec::new(); - let mut used_keys = HashSet::new(); - for _ in 0..10000 { - loop { - let key = rand::random::(); - if used_keys.contains(&key) { - continue; - } - used_keys.insert(key); - keys.push(key.into()); - break; - } - } - test_inserts(&keys); -} - -struct TestValue(AtomicUsize); - -impl TestValue { - fn new(val: usize) -> TestValue { - TestValue(AtomicUsize::new(val)) - } - - fn load(&self) -> usize { - self.0.load(Ordering::Relaxed) - } -} - -impl Value for TestValue {} - -impl Clone for TestValue { - fn clone(&self) -> TestValue { - TestValue::new(self.load()) - } -} - -impl Debug for TestValue { - fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { - write!(fmt, "{:?}", self.load()) - } -} - -#[derive(Clone, Debug)] -struct TestOp(TestKey, Option); - -fn apply_op>( - op: &TestOp, - tree: &TreeWriteAccess, - shadow: &mut BTreeMap, -) { - eprintln!("applying op: {op:?}"); - - // apply the change to the shadow tree first - let shadow_existing = if let Some(v) = op.1 { - shadow.insert(op.0, v) - } else { - shadow.remove(&op.0) - }; - - // apply to Art tree - let w = tree.start_write(); - w.update_with_fn(&op.0, |existing| { - assert_eq!(existing.map(TestValue::load), shadow_existing); - - match (existing, op.1) { - (None, None) => UpdateAction::Nothing, - (None, Some(new_val)) => UpdateAction::Insert(TestValue::new(new_val)), - (Some(_old_val), None) => UpdateAction::Remove, - (Some(old_val), Some(new_val)) => { - old_val.0.store(new_val, Ordering::Relaxed); - UpdateAction::Nothing - } - } - }) - .expect("out of memory"); -} - -fn test_iter>( - tree: &TreeWriteAccess, - shadow: &BTreeMap, -) { - let mut shadow_iter = shadow.iter(); - let mut iter = TreeIterator::new(&(TestKey::MIN..TestKey::MAX)); - - loop { - let shadow_item = shadow_iter.next().map(|(k, v)| (*k, *v)); - let r = tree.start_read(); - let item = iter.next(&r); - - if shadow_item != item.map(|(k, v)| (k, v.load())) { - eprintln!("FAIL: iterator returned {item:?}, expected {shadow_item:?}"); - tree.start_read().dump(&mut std::io::stderr()); - - eprintln!("SHADOW:"); - for si in shadow { - eprintln!("key: {:?}, val: {}", si.0, si.1); - } - panic!("FAIL: iterator returned {item:?}, expected {shadow_item:?}"); - } - if item.is_none() { - break; - } - } -} - -#[test] -fn random_ops() { - const MEM_SIZE: usize = 10000000; - let mut area = Box::new_uninit_slice(MEM_SIZE); - - let allocator = ArtMultiSlabAllocator::new(&mut area); - - let init_struct = TreeInitStruct::::new(allocator); - let tree_writer = init_struct.attach_writer(); - - let mut shadow: std::collections::BTreeMap = BTreeMap::new(); - - let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap(); - let mut rng = rand::rng(); - for i in 0..100000 { - let mut key: TestKey = (rng.sample(distribution) as u128).into(); - - if rng.random_bool(0.10) { - key = TestKey::from(u128::from(&key) | 0xffffffff); - } - - let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None }); - - apply_op(&op, &tree_writer, &mut shadow); - - if i % 1000 == 0 { - eprintln!("{i} ops processed"); - eprintln!("stats: {:?}", tree_writer.get_statistics()); - test_iter(&tree_writer, &shadow); - } - } -}