more memory allocation stuff

2026-01-12 16:02:56 +00:00 · 2025-05-02 02:13:45 +03:00
parent e40193e3c8
commit 54cd2272f1
12 changed files with 485 additions and 174 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3906,6 +3906,7 @@ name = "neonart"
 version = "0.1.0"
 dependencies = [
 "rand 0.8.5",
+ "spin",
 "tracing",
 "zerocopy 0.8.24",
 ]
@@ -6737,6 +6738,9 @@ name = "spin"
 version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
+dependencies = [
+ "lock_api",
+]

 [[package]]
 name = "spinning_top"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -180,6 +180,7 @@ smallvec = "1.11"
 smol_str = { version = "0.2.0", features = ["serde"] }
 socket2 = "0.5"
 spki = "0.7.3"
+spin = "0.9.8"
 strum = "0.26"
 strum_macros = "0.26"
 "subtle"  = "2.5.0"
--- a/libs/neonart/Cargo.toml
+++ b/libs/neonart/Cargo.toml
@@ -5,6 +5,7 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
+spin.workspace = true
 tracing.workspace = true

 rand.workspace = true # for tests
--- a/libs/neonart/src/algorithm.rs
+++ b/libs/neonart/src/algorithm.rs
@@ -1,5 +1,5 @@
 mod lock_and_version;
-mod node_ptr;
+pub(crate) mod node_ptr;
 mod node_ref;

 use std::vec::Vec;
@@ -9,12 +9,13 @@ use crate::algorithm::node_ptr::{MAX_PREFIX_LEN, NodePtr};
 use crate::algorithm::node_ref::ChildOrValue;
 use crate::algorithm::node_ref::{NodeRef, ReadLockedNodeRef, WriteLockedNodeRef};

+use crate::allocator::ArtAllocator;
 use crate::epoch::EpochPin;
-use crate::{Allocator, Key, Value};
+use crate::{Key, Value};

 pub(crate) type RootPtr<V> = node_ptr::NodePtr<V>;

-pub fn new_root<V: Value>(allocator: &Allocator) -> RootPtr<V> {
+pub fn new_root<V: Value>(allocator: &impl ArtAllocator<V>) -> RootPtr<V> {
    node_ptr::new_root(allocator)
 }

@@ -36,7 +37,7 @@ pub(crate) fn update_fn<'e, K: Key, V: Value, F>(
    key: &K,
    value_fn: F,
    root: RootPtr<V>,
-    allocator: &Allocator,
+    allocator: &impl ArtAllocator<V>,
    epoch_pin: &'e EpochPin,
 ) where
    F: FnOnce(Option<&V>) -> Option<V>,
@@ -111,7 +112,7 @@ pub(crate) fn update_recurse<'e, V: Value, F>(
    value_fn: F,
    node: NodeRef<'e, V>,
    rparent: Option<(ReadLockedNodeRef<V>, u8)>,
-    allocator: &Allocator,
+    allocator: &impl ArtAllocator<V>,
    epoch_pin: &'e EpochPin,
    level: usize,
    orig_key: &[u8],
@@ -283,7 +284,7 @@ fn insert_split_prefix<'a, V: Value>(
    node: &mut WriteLockedNodeRef<V>,
    parent: &mut WriteLockedNodeRef<V>,
    parent_key: u8,
-    allocator: &Allocator,
+    allocator: &impl ArtAllocator<V>,
 ) {
    let old_node = node;
    let old_prefix = old_node.get_prefix();
@@ -310,7 +311,7 @@ fn insert_to_node<V: Value>(
    wnode: &mut WriteLockedNodeRef<V>,
    key: &[u8],
    value: V,
-    allocator: &Allocator,
+    allocator: &impl ArtAllocator<V>,
 ) {
    if wnode.is_leaf() {
        wnode.insert_value(key[0], value);
@@ -327,7 +328,7 @@ fn insert_and_grow<V: Value>(
    wnode: &WriteLockedNodeRef<V>,
    parent: &mut WriteLockedNodeRef<V>,
    parent_key_byte: u8,
-    allocator: &Allocator,
+    allocator: &impl ArtAllocator<V>,
 ) {
    let mut bigger_node = wnode.grow(allocator);

@@ -344,7 +345,11 @@ fn insert_and_grow<V: Value>(

 // Allocate a new leaf node to hold 'value'. If key is long, we may need to allocate
 // new internal nodes to hold it too
-fn allocate_node_for_value<V: Value>(key: &[u8], value: V, allocator: &Allocator) -> NodePtr<V> {
+fn allocate_node_for_value<V: Value>(
+    key: &[u8],
+    value: V,
+    allocator: &impl ArtAllocator<V>,
+) -> NodePtr<V> {
    let mut prefix_off = key.len().saturating_sub(MAX_PREFIX_LEN + 1);

    let mut leaf_node = node_ref::new_leaf(&key[prefix_off..key.len() - 1], allocator);
--- a/libs/neonart/src/algorithm/node_ptr.rs
+++ b/libs/neonart/src/algorithm/node_ptr.rs
@@ -3,8 +3,8 @@ use std::ptr::NonNull;

 use super::lock_and_version::AtomicLockAndVersion;

-use crate::Allocator;
 use crate::Value;
+use crate::allocator::ArtAllocator;

 pub(crate) const MAX_PREFIX_LEN: usize = 8;

@@ -75,7 +75,7 @@ pub(crate) enum ChildOrValuePtr<V> {
 }

 #[repr(C)]
-struct NodeInternal4<V> {
+pub struct NodeInternal4<V> {
    tag: NodeTag,
    lock_and_version: AtomicLockAndVersion,

@@ -88,7 +88,7 @@ struct NodeInternal4<V> {
 }

 #[repr(C)]
-struct NodeInternal16<V> {
+pub struct NodeInternal16<V> {
    tag: NodeTag,
    lock_and_version: AtomicLockAndVersion,

@@ -103,7 +103,7 @@ struct NodeInternal16<V> {
 const INVALID_CHILD_INDEX: u8 = u8::MAX;

 #[repr(C)]
-struct NodeInternal48<V> {
+pub struct NodeInternal48<V> {
    tag: NodeTag,
    lock_and_version: AtomicLockAndVersion,

@@ -116,7 +116,7 @@ struct NodeInternal48<V> {
 }

 #[repr(C)]
-pub(crate) struct NodeInternal256<V> {
+pub struct NodeInternal256<V> {
    tag: NodeTag,
    lock_and_version: AtomicLockAndVersion,

@@ -128,7 +128,7 @@ pub(crate) struct NodeInternal256<V> {
 }

 #[repr(C)]
-struct NodeLeaf4<V> {
+pub struct NodeLeaf4<V> {
    tag: NodeTag,
    lock_and_version: AtomicLockAndVersion,

@@ -141,7 +141,7 @@ struct NodeLeaf4<V> {
 }

 #[repr(C)]
-struct NodeLeaf16<V> {
+pub struct NodeLeaf16<V> {
    tag: NodeTag,
    lock_and_version: AtomicLockAndVersion,

@@ -154,7 +154,7 @@ struct NodeLeaf16<V> {
 }

 #[repr(C)]
-struct NodeLeaf48<V> {
+pub struct NodeLeaf48<V> {
    tag: NodeTag,
    lock_and_version: AtomicLockAndVersion,

@@ -167,7 +167,7 @@ struct NodeLeaf48<V> {
 }

 #[repr(C)]
-struct NodeLeaf256<V> {
+pub struct NodeLeaf256<V> {
    tag: NodeTag,
    lock_and_version: AtomicLockAndVersion,

@@ -352,7 +352,7 @@ impl<V: Value> NodePtr<V> {
        }
    }

-    pub(crate) fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
+    pub(crate) fn grow(&self, allocator: &impl ArtAllocator<V>) -> NodePtr<V> {
        match self.variant() {
            NodeVariant::Internal4(n) => n.grow(allocator),
            NodeVariant::Internal16(n) => n.grow(allocator),
@@ -403,17 +403,43 @@ impl<V: Value> NodePtr<V> {
            NodeVariantMut::Leaf256(n) => n.insert_value(key_byte, value),
        }
    }
-}

-pub fn new_root<V: Value>(allocator: &Allocator) -> NodePtr<V> {
-    NodePtr {
-        ptr: allocator.alloc(NodeInternal256::<V>::new()).as_ptr().cast(),
-        phantom_value: PhantomData,
+    // FIXME
+    /*
+        pub(crate) fn deallocate(self, allocator: &impl ArtAllocator<V>) {
+            match self.variant() {
+                NodeVariant::Internal4(_) => allocator.dealloc_node_internal4(self.ptr.cast()),
+                NodeVariant::Internal16(_) => allocator.dealloc_node_internal16(self.ptr.cast()),
+                NodeVariant::Internal48(_) => allocator.dealloc_node_internal48(self.ptr.cast()),
+                NodeVariant::Internal256(_) => allocator.dealloc_node_internal256(self.ptr.cast()),
+                NodeVariant::Leaf4(_) => allocator.dealloc_node_leaf4(self.ptr.cast()),
+                NodeVariant::Leaf16(_) => allocator.dealloc_node_leaf16(self.ptr.cast()),
+                NodeVariant::Leaf48(_) => allocator.dealloc_node_leaf48(self.ptr.cast()),
+                NodeVariant::Leaf256(_) => allocator.dealloc_node_leaf256(self.ptr.cast()),
+            }
    }
+        */
 }

-pub fn new_internal<V: Value>(prefix: &[u8], allocator: &Allocator) -> NodePtr<V> {
-    let mut node = allocator.alloc(NodeInternal4 {
+pub fn new_root<V: Value>(allocator: &impl ArtAllocator<V>) -> NodePtr<V> {
+    let ptr: *mut NodeInternal256<V> = allocator.alloc_node_internal256().cast();
+    if ptr.is_null() {
+        panic!("out of memory");
+    }
+
+    unsafe {
+        *ptr = NodeInternal256::<V>::new();
+    }
+
+    ptr.into()
+}
+
+pub fn new_internal<V: Value>(prefix: &[u8], allocator: &impl ArtAllocator<V>) -> NodePtr<V> {
+    let ptr: *mut NodeInternal4<V> = allocator.alloc_node_internal4().cast();
+    if ptr.is_null() {
+        panic!("out of memory");
+    }
+    let mut init = NodeInternal4 {
        tag: NodeTag::Internal4,
        lock_and_version: AtomicLockAndVersion::new(),

@@ -423,14 +449,19 @@ pub fn new_internal<V: Value>(prefix: &[u8], allocator: &Allocator) -> NodePtr<V

        child_keys: [0; 4],
        child_ptrs: [const { NodePtr::null() }; 4],
-    });
-    node.prefix[0..prefix.len()].copy_from_slice(prefix);
+    };
+    init.prefix[0..prefix.len()].copy_from_slice(prefix);
+    unsafe { ptr.write(init) };

-    node.as_ptr().into()
+    ptr.into()
 }

-pub fn new_leaf<V: Value>(prefix: &[u8], allocator: &Allocator) -> NodePtr<V> {
-    let mut node = allocator.alloc(NodeLeaf4 {
+pub fn new_leaf<V: Value>(prefix: &[u8], allocator: &impl ArtAllocator<V>) -> NodePtr<V> {
+    let ptr: *mut NodeLeaf4<V> = allocator.alloc_node_leaf4().cast();
+    if ptr.is_null() {
+        panic!("out of memory");
+    }
+    let mut init = NodeLeaf4 {
        tag: NodeTag::Leaf4,
        lock_and_version: AtomicLockAndVersion::new(),

@@ -440,10 +471,11 @@ pub fn new_leaf<V: Value>(prefix: &[u8], allocator: &Allocator) -> NodePtr<V> {

        child_keys: [0; 4],
        child_values: [const { None }; 4],
-    });
-    node.prefix[0..prefix.len()].copy_from_slice(prefix);
+    };
+    init.prefix[0..prefix.len()].copy_from_slice(prefix);
+    unsafe { ptr.write(init) };

-    node.as_ptr().into()
+    ptr.into()
 }

 impl<V: Value> NodeInternal4<V> {
@@ -493,8 +525,12 @@ impl<V: Value> NodeInternal4<V> {
        self.num_children += 1;
    }

-    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        let mut node16 = allocator.alloc(NodeInternal16 {
+    fn grow(&self, allocator: &impl ArtAllocator<V>) -> NodePtr<V> {
+        let ptr: *mut NodeInternal16<V> = allocator.alloc_node_internal16().cast();
+        if ptr.is_null() {
+            panic!("out of memory");
+        }
+        let mut init = NodeInternal16 {
            tag: NodeTag::Internal16,
            lock_and_version: AtomicLockAndVersion::new(),

@@ -504,13 +540,13 @@ impl<V: Value> NodeInternal4<V> {

            child_keys: [0; 16],
            child_ptrs: [const { NodePtr::null() }; 16],
-        });
+        };
        for i in 0..self.num_children as usize {
-            node16.child_keys[i] = self.child_keys[i];
-            node16.child_ptrs[i] = self.child_ptrs[i];
+            init.child_keys[i] = self.child_keys[i];
+            init.child_ptrs[i] = self.child_ptrs[i];
        }
-
-        node16.as_ptr().into()
+        unsafe { ptr.write(init) };
+        ptr.into()
    }
 }

@@ -561,8 +597,12 @@ impl<V: Value> NodeInternal16<V> {
        self.num_children += 1;
    }

-    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        let mut node48 = allocator.alloc(NodeInternal48 {
+    fn grow(&self, allocator: &impl ArtAllocator<V>) -> NodePtr<V> {
+        let ptr: *mut NodeInternal48<V> = allocator.alloc_node_internal48().cast();
+        if ptr.is_null() {
+            panic!("out of memory");
+        }
+        let mut init = NodeInternal48 {
            tag: NodeTag::Internal48,
            lock_and_version: AtomicLockAndVersion::new(),

@@ -572,14 +612,14 @@ impl<V: Value> NodeInternal16<V> {

            child_indexes: [INVALID_CHILD_INDEX; 256],
            child_ptrs: [const { NodePtr::null() }; 48],
-        });
+        };
        for i in 0..self.num_children as usize {
            let idx = self.child_keys[i] as usize;
-            node48.child_indexes[idx] = i as u8;
-            node48.child_ptrs[i] = self.child_ptrs[i];
+            init.child_indexes[idx] = i as u8;
+            init.child_ptrs[i] = self.child_ptrs[i];
        }
-
-        node48.as_ptr().into()
+        unsafe { ptr.write(init) };
+        ptr.into()
    }
 }

@@ -629,8 +669,12 @@ impl<V: Value> NodeInternal48<V> {
        self.num_children += 1;
    }

-    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        let mut node256 = allocator.alloc(NodeInternal256 {
+    fn grow(&self, allocator: &impl ArtAllocator<V>) -> NodePtr<V> {
+        let ptr: *mut NodeInternal256<V> = allocator.alloc_node_internal256().cast();
+        if ptr.is_null() {
+            panic!("out of memory");
+        }
+        let mut init = NodeInternal256 {
            tag: NodeTag::Internal256,
            lock_and_version: AtomicLockAndVersion::new(),

@@ -639,14 +683,15 @@ impl<V: Value> NodeInternal48<V> {
            num_children: self.num_children as u16,

            child_ptrs: [const { NodePtr::null() }; 256],
-        });
+        };
        for i in 0..256 {
            let idx = self.child_indexes[i];
            if idx != INVALID_CHILD_INDEX {
-                node256.child_ptrs[i] = self.child_ptrs[idx as usize];
+                init.child_ptrs[i] = self.child_ptrs[idx as usize];
            }
        }
-        node256.as_ptr().into()
+        unsafe { ptr.write(init) };
+        ptr.into()
    }
 }

@@ -732,8 +777,12 @@ impl<V: Value> NodeLeaf4<V> {
        self.num_values += 1;
    }

-    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        let mut node16 = allocator.alloc(NodeLeaf16 {
+    fn grow(&self, allocator: &impl ArtAllocator<V>) -> NodePtr<V> {
+        let ptr: *mut NodeLeaf16<V> = allocator.alloc_node_leaf16();
+        if ptr.is_null() {
+            panic!("out of memory");
+        }
+        let mut init = NodeLeaf16 {
            tag: NodeTag::Leaf16,
            lock_and_version: AtomicLockAndVersion::new(),

@@ -743,12 +792,13 @@ impl<V: Value> NodeLeaf4<V> {

            child_keys: [0; 16],
            child_values: [const { None }; 16],
-        });
+        };
        for i in 0..self.num_values as usize {
-            node16.child_keys[i] = self.child_keys[i];
-            node16.child_values[i] = self.child_values[i].clone();
+            init.child_keys[i] = self.child_keys[i];
+            init.child_values[i] = self.child_values[i].clone();
        }
-        node16.as_ptr().into()
+        unsafe { ptr.write(init) };
+        ptr.into()
    }
 }

@@ -788,8 +838,12 @@ impl<V: Value> NodeLeaf16<V> {
        self.child_values[idx] = Some(value);
        self.num_values += 1;
    }
-    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        let mut node48 = allocator.alloc(NodeLeaf48 {
+    fn grow(&self, allocator: &impl ArtAllocator<V>) -> NodePtr<V> {
+        let ptr: *mut NodeLeaf48<V> = allocator.alloc_node_leaf48().cast();
+        if ptr.is_null() {
+            panic!("out of memory");
+        }
+        let mut init = NodeLeaf48 {
            tag: NodeTag::Leaf48,
            lock_and_version: AtomicLockAndVersion::new(),

@@ -799,13 +853,14 @@ impl<V: Value> NodeLeaf16<V> {

            child_indexes: [INVALID_CHILD_INDEX; 256],
            child_values: [const { None }; 48],
-        });
+        };
        for i in 0..self.num_values {
            let idx = self.child_keys[i as usize];
-            node48.child_indexes[idx as usize] = i;
-            node48.child_values[i as usize] = self.child_values[i as usize].clone();
+            init.child_indexes[idx as usize] = i;
+            init.child_values[i as usize] = self.child_values[i as usize].clone();
        }
-        node48.as_ptr().into()
+        unsafe { ptr.write(init) };
+        ptr.into()
    }
 }

@@ -845,8 +900,12 @@ impl<V: Value> NodeLeaf48<V> {
        self.child_values[idx as usize] = Some(value);
        self.num_values += 1;
    }
-    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
-        let mut node256 = allocator.alloc(NodeLeaf256 {
+    fn grow(&self, allocator: &impl ArtAllocator<V>) -> NodePtr<V> {
+        let ptr: *mut NodeLeaf256<V> = allocator.alloc_node_leaf256();
+        if ptr.is_null() {
+            panic!("out of memory");
+        }
+        let mut init = NodeLeaf256 {
            tag: NodeTag::Leaf256,
            lock_and_version: AtomicLockAndVersion::new(),

@@ -855,14 +914,15 @@ impl<V: Value> NodeLeaf48<V> {
            num_values: self.num_values as u16,

            child_values: [const { None }; 256],
-        });
+        };
        for i in 0..256 {
            let idx = self.child_indexes[i];
            if idx != INVALID_CHILD_INDEX {
-                node256.child_values[i] = self.child_values[idx as usize].clone();
+                init.child_values[i] = self.child_values[idx as usize].clone();
            }
        }
-        node256.as_ptr().into()
+        unsafe { ptr.write(init) };
+        ptr.into()
    }
 }

--- a/libs/neonart/src/algorithm/node_ref.rs
+++ b/libs/neonart/src/algorithm/node_ref.rs
@@ -6,8 +6,9 @@ use super::node_ptr;
 use super::node_ptr::ChildOrValuePtr;
 use super::node_ptr::NodePtr;
 use crate::EpochPin;
+use crate::Value;
 use crate::algorithm::lock_and_version::AtomicLockAndVersion;
-use crate::{Allocator, Value};
+use crate::allocator::ArtAllocator;

 pub struct NodeRef<'e, V> {
    ptr: NodePtr<V>,
@@ -148,7 +149,7 @@ impl<'e, V: Value> WriteLockedNodeRef<'e, V> {
        self.ptr.insert_value(key_byte, value)
    }

-    pub(crate) fn grow(&self, allocator: &Allocator) -> NewNodeRef<V> {
+    pub(crate) fn grow(&self, allocator: &impl ArtAllocator<V>) -> NewNodeRef<V> {
        let new_node = self.ptr.grow(allocator);
        NewNodeRef { ptr: new_node }
    }
@@ -189,13 +190,16 @@ impl<V: Value> NewNodeRef<V> {
    }
 }

-pub(crate) fn new_internal<V: Value>(prefix: &[u8], allocator: &Allocator) -> NewNodeRef<V> {
+pub(crate) fn new_internal<V: Value>(
+    prefix: &[u8],
+    allocator: &impl ArtAllocator<V>,
+) -> NewNodeRef<V> {
    NewNodeRef {
        ptr: node_ptr::new_internal(prefix, allocator),
    }
 }

-pub(crate) fn new_leaf<V: Value>(prefix: &[u8], allocator: &Allocator) -> NewNodeRef<V> {
+pub(crate) fn new_leaf<V: Value>(prefix: &[u8], allocator: &impl ArtAllocator<V>) -> NewNodeRef<V> {
    NewNodeRef {
        ptr: node_ptr::new_leaf(prefix, allocator),
    }
--- a/libs/neonart/src/allocator.rs
+++ b/libs/neonart/src/allocator.rs
@@ -1,56 +1,136 @@
+mod block;
+mod multislab;
+mod slab;
+mod r#static;
+
+use std::alloc::Layout;
 use std::marker::PhantomData;
 use std::mem::MaybeUninit;
-use std::ops::{Deref, DerefMut};
-use std::ptr::NonNull;
-use std::sync::atomic::{AtomicUsize, Ordering};

+use crate::allocator::multislab::MultiSlabAllocator;
+
+use crate::Tree;
+pub use crate::algorithm::node_ptr::{
+    NodeInternal4, NodeInternal16, NodeInternal48, NodeInternal256, NodeLeaf4, NodeLeaf16,
+    NodeLeaf48, NodeLeaf256,
+};
+
+pub trait ArtAllocator<V: crate::Value> {
+    fn alloc_tree(&self) -> *mut Tree<V>;
+
+    fn alloc_node_internal4(&self) -> *mut NodeInternal4<V>;
+    fn alloc_node_internal16(&self) -> *mut NodeInternal16<V>;
+    fn alloc_node_internal48(&self) -> *mut NodeInternal48<V>;
+    fn alloc_node_internal256(&self) -> *mut NodeInternal256<V>;
+    fn alloc_node_leaf4(&self) -> *mut NodeLeaf4<V>;
+    fn alloc_node_leaf16(&self) -> *mut NodeLeaf16<V>;
+    fn alloc_node_leaf48(&self) -> *mut NodeLeaf48<V>;
+    fn alloc_node_leaf256(&self) -> *mut NodeLeaf256<V>;
+
+    fn dealloc_node_internal4(&self, ptr: *mut NodeInternal4<V>);
+    fn dealloc_node_internal16(&self, ptr: *mut NodeInternal16<V>);
+    fn dealloc_node_internal48(&self, ptr: *mut NodeInternal48<V>);
+    fn dealloc_node_internal256(&self, ptr: *mut NodeInternal256<V>);
+    fn dealloc_node_leaf4(&self, ptr: *mut NodeLeaf4<V>);
+    fn dealloc_node_leaf16(&self, ptr: *mut NodeLeaf16<V>);
+    fn dealloc_node_leaf48(&self, ptr: *mut NodeLeaf48<V>);
+    fn dealloc_node_leaf256(&self, ptr: *mut NodeLeaf256<V>);
+}
+
+#[repr(transparent)]
+pub struct ArtMultiSlabAllocator<'t, V> {
+    inner: MultiSlabAllocator<'t, 8>,
+
+    phantom_val: PhantomData<V>,
+}
+
+impl<'t, V: crate::Value> ArtMultiSlabAllocator<'t, V> {
+    const LAYOUTS: [Layout; 8] = [
+        Layout::new::<NodeInternal4<V>>(),
+        Layout::new::<NodeInternal16<V>>(),
+        Layout::new::<NodeInternal48<V>>(),
+        Layout::new::<NodeInternal256<V>>(),
+        Layout::new::<NodeLeaf4<V>>(),
+        Layout::new::<NodeLeaf16<V>>(),
+        Layout::new::<NodeLeaf48<V>>(),
+        Layout::new::<NodeLeaf256<V>>(),
+    ];
+
+    pub fn new(area: &'t mut [MaybeUninit<u8>]) -> &'t mut ArtMultiSlabAllocator<'t, V> {
+        let allocator = MultiSlabAllocator::new(area, &Self::LAYOUTS);
+
+        let ptr: *mut MultiSlabAllocator<8> = allocator;
+
+        let ptr: *mut ArtMultiSlabAllocator<V> = ptr.cast();
+
+        unsafe { ptr.as_mut().unwrap() }
+    }
+}
+
+impl<'t, V: crate::Value> ArtAllocator<V> for ArtMultiSlabAllocator<'t, V> {
+    fn alloc_tree(&self) -> *mut Tree<V> {
+        self.inner.alloc_fit(Layout::new::<Tree<V>>()).cast()
+    }
+
+    fn alloc_node_internal4(&self) -> *mut NodeInternal4<V> {
+        self.inner.alloc_slab(0).cast()
+    }
+    fn alloc_node_internal16(&self) -> *mut NodeInternal16<V> {
+        self.inner.alloc_slab(1).cast()
+    }
+    fn alloc_node_internal48(&self) -> *mut NodeInternal48<V> {
+        self.inner.alloc_slab(2).cast()
+    }
+    fn alloc_node_internal256(&self) -> *mut NodeInternal256<V> {
+        self.inner.alloc_slab(3).cast()
+    }
+    fn alloc_node_leaf4(&self) -> *mut NodeLeaf4<V> {
+        self.inner.alloc_slab(4).cast()
+    }
+    fn alloc_node_leaf16(&self) -> *mut NodeLeaf16<V> {
+        self.inner.alloc_slab(5).cast()
+    }
+    fn alloc_node_leaf48(&self) -> *mut NodeLeaf48<V> {
+        self.inner.alloc_slab(6).cast()
+    }
+    fn alloc_node_leaf256(&self) -> *mut NodeLeaf256<V> {
+        self.inner.alloc_slab(7).cast()
+    }
+
+    fn dealloc_node_internal4(&self, ptr: *mut NodeInternal4<V>) {
+        self.inner.dealloc_slab(0, ptr.cast())
+    }
+
+    fn dealloc_node_internal16(&self, ptr: *mut NodeInternal16<V>) {
+        self.inner.dealloc_slab(1, ptr.cast())
+    }
+    fn dealloc_node_internal48(&self, ptr: *mut NodeInternal48<V>) {
+        self.inner.dealloc_slab(2, ptr.cast())
+    }
+    fn dealloc_node_internal256(&self, ptr: *mut NodeInternal256<V>) {
+        self.inner.dealloc_slab(3, ptr.cast())
+    }
+    fn dealloc_node_leaf4(&self, ptr: *mut NodeLeaf4<V>) {
+        self.inner.dealloc_slab(4, ptr.cast())
+    }
+    fn dealloc_node_leaf16(&self, ptr: *mut NodeLeaf16<V>) {
+        self.inner.dealloc_slab(5, ptr.cast())
+    }
+    fn dealloc_node_leaf48(&self, ptr: *mut NodeLeaf48<V>) {
+        self.inner.dealloc_slab(6, ptr.cast())
+    }
+    fn dealloc_node_leaf256(&self, ptr: *mut NodeLeaf256<V>) {
+        self.inner.dealloc_slab(7, ptr.cast())
+    }
+}
+
+/*
 pub struct Allocator {
    area: *mut MaybeUninit<u8>,
    allocated: AtomicUsize,
    size: usize,
 }

-// FIXME: I don't know if these are really safe...
-unsafe impl Send for Allocator {}
-unsafe impl Sync for Allocator {}
-
-#[repr(transparent)]
-pub struct AllocatedBox<'a, T> {
-    inner: NonNull<T>,
-
-    _phantom: PhantomData<&'a Allocator>,
-}
-
-// FIXME: I don't know if these are really safe...
-unsafe impl<'a, T> Send for AllocatedBox<'a, T> {}
-unsafe impl<'a, T> Sync for AllocatedBox<'a, T> {}
-
-impl<T> Deref for AllocatedBox<'_, T> {
-    type Target = T;
-
-    fn deref(&self) -> &T {
-        unsafe { self.inner.as_ref() }
-    }
-}
-
-impl<T> DerefMut for AllocatedBox<'_, T> {
-    fn deref_mut(&mut self) -> &mut T {
-        unsafe { self.inner.as_mut() }
-    }
-}
-
-impl<T> AsMut<T> for AllocatedBox<'_, T> {
-    fn as_mut(&mut self) -> &mut T {
-        unsafe { self.inner.as_mut() }
-    }
-}
-
-impl<T> AllocatedBox<'_, T> {
-    pub fn as_ptr(&self) -> *mut T {
-        self.inner.as_ptr()
-    }
-}
-
 const MAXALIGN: usize = std::mem::align_of::<usize>();

 impl Allocator {
@@ -105,3 +185,4 @@ impl Allocator {
        // doesn't free it immediately.
    }
 }
+*/
--- a/libs/neonart/src/allocator/block.rs
+++ b/libs/neonart/src/allocator/block.rs
@@ -5,12 +5,14 @@ use std::sync::atomic::{AtomicU64, Ordering};

 use spin;

-const BLOCK_SIZE: usize = 16*1024;
+use crate::allocator::r#static::StaticAllocator;
+
+const BLOCK_SIZE: usize = 16 * 1024;

 const INVALID_BLOCK: u64 = u64::MAX;

-pub(crate) struct BlockAllocator {
-    blocks_ptr: *mut MaybeUninit<u8>,
+pub(crate) struct BlockAllocator<'t> {
+    blocks_ptr: &'t [MaybeUninit<u8>],
    num_blocks: u64,
    num_initialized: AtomicU64,

@@ -28,23 +30,19 @@ struct FreeListBlockInner {
    free_blocks: [u64; 100], // FIXME: fill the rest of the block
 }

+impl<'t> BlockAllocator<'t> {
+    pub(crate) fn new(area: &'t mut [MaybeUninit<u8>]) -> Self {
+        let mut alloc = StaticAllocator::new(area);

-impl BlockAllocator {
-    pub(crate) fn new(ptr: *mut MaybeUninit<u8>, size: usize) -> Self {
-        let mut p = ptr;
        // Use all the space for the blocks
-        let padding = p.align_offset(BLOCK_SIZE);
-        p = unsafe { p.byte_add(padding) };
-        let blocks_ptr = p;
+        alloc.align(BLOCK_SIZE);

-        let used = unsafe { p.byte_offset_from(ptr) as usize };
-        assert!(used <= size);
-        let blocks_size = size - used;
+        let remain = alloc.remaining();

-        let num_blocks = (blocks_size / BLOCK_SIZE) as u64;
+        let num_blocks = (remain.len() / BLOCK_SIZE) as u64;

        BlockAllocator {
-            blocks_ptr,
+            blocks_ptr: remain,
            num_blocks,
            num_initialized: AtomicU64::new(0),
            freelist_head: spin::Mutex::new(INVALID_BLOCK),
@@ -60,7 +58,13 @@ impl BlockAllocator {

    fn get_block_ptr(&self, blkno: u64) -> *mut u8 {
        assert!(blkno < self.num_blocks);
-        unsafe { self.blocks_ptr.byte_offset(blkno as isize * BLOCK_SIZE as isize) }.cast()
+        unsafe {
+            self.blocks_ptr
+                .as_ptr()
+                .byte_offset(blkno as isize * BLOCK_SIZE as isize)
+        }
+        .cast_mut()
+        .cast()
    }

    pub(crate) fn alloc_block(&self) -> *mut u8 {
@@ -95,14 +99,19 @@ impl BlockAllocator {
        // If there are some blocks left that we've never used, pick next such block
        let mut next_uninitialized = self.num_initialized.load(Ordering::Relaxed);
        while next_uninitialized < self.num_blocks {
-            match self.num_initialized.compare_exchange(next_uninitialized, next_uninitialized + 1, Ordering::Relaxed, Ordering::Relaxed) {
+            match self.num_initialized.compare_exchange(
+                next_uninitialized,
+                next_uninitialized + 1,
+                Ordering::Relaxed,
+                Ordering::Relaxed,
+            ) {
                Ok(_) => {
                    return next_uninitialized;
-                },
+                }
                Err(old) => {
                    next_uninitialized = old;
                    continue;
-                },
+                }
            }
        }

--- a/libs/neonart/src/allocator/multislab.rs
+++ b/libs/neonart/src/allocator/multislab.rs
@@ -0,0 +1,56 @@
+use std::alloc::Layout;
+use std::mem::MaybeUninit;
+
+use crate::allocator::block::BlockAllocator;
+use crate::allocator::slab::SlabDesc;
+use crate::allocator::r#static::StaticAllocator;
+
+pub struct MultiSlabAllocator<'t, const N: usize> {
+    pub(crate) block_allocator: BlockAllocator<'t>,
+
+    pub(crate) slab_descs: [SlabDesc; N],
+}
+
+unsafe impl<'t, const N: usize> Sync for MultiSlabAllocator<'t, N> {}
+unsafe impl<'t, const N: usize> Send for MultiSlabAllocator<'t, N> {}
+
+impl<'t, const N: usize> MultiSlabAllocator<'t, N> {
+    pub(crate) fn new(
+        area: &'t mut [MaybeUninit<u8>],
+        layouts: &[Layout; N],
+    ) -> &'t mut MultiSlabAllocator<'t, N> {
+        // Set up the MultiSlabAllocator struct in the area first
+        let mut allocator = StaticAllocator::new(area);
+
+        let this = allocator.alloc_uninit();
+
+        let block_allocator = BlockAllocator::new(allocator.remaining());
+
+        let this = this.write(MultiSlabAllocator {
+            block_allocator,
+
+            slab_descs: std::array::from_fn(|i| SlabDesc::new(&layouts[i])),
+        });
+
+        this
+    }
+
+    pub(crate) fn alloc_fit(&self, layout: Layout) -> *mut u8 {
+        for i in 0..self.slab_descs.len() {
+            if self.slab_descs[i].layout.align() >= layout.align()
+                && self.slab_descs[i].layout.size() >= layout.size()
+            {
+                return self.alloc_slab(i);
+            }
+        }
+        panic!("no suitable slab found for allocation");
+    }
+
+    pub(crate) fn alloc_slab(&self, slab_idx: usize) -> *mut u8 {
+        self.slab_descs[slab_idx].alloc_chunk(&self.block_allocator)
+    }
+
+    pub(crate) fn dealloc_slab(&self, slab_idx: usize, ptr: *mut u8) {
+        self.slab_descs[slab_idx].dealloc_chunk(ptr, &self.block_allocator)
+    }
+}
--- a/libs/neonart/src/allocator/static.rs
+++ b/libs/neonart/src/allocator/static.rs
@@ -0,0 +1,57 @@
+use std::mem::MaybeUninit;
+
+pub struct StaticAllocator<'t> {
+    area: &'t mut [MaybeUninit<u8>],
+}
+
+impl<'t> StaticAllocator<'t> {
+    pub fn new(_area: &'t mut [MaybeUninit<u8>]) -> StaticAllocator<'t> {
+        todo!()
+    }
+
+    /*
+        pub fn alloc<T>(&mut self, _init: T) -> &'t T {
+            todo!()
+    }
+        */
+
+    pub fn alloc_uninit<T>(&mut self) -> &'t mut MaybeUninit<T> {
+        todo!()
+    }
+
+    pub fn remaining(self) -> &'t mut [MaybeUninit<u8>] {
+        self.area
+    }
+
+    pub fn align(&mut self, _alignment: usize) {
+        todo!()
+    }
+
+    /*
+
+        pub fn static_alloc<'a, T: Sized>(&'a self, value: T) -> AllocatedBox<'a, T> {
+            let sz = std::mem::size_of::<T>();
+
+            // pad all allocations to MAXALIGN boundaries
+            assert!(std::mem::align_of::<T>() <= MAXALIGN);
+            let sz = sz.next_multiple_of(MAXALIGN);
+
+            let offset = self.allocated.fetch_add(sz, Ordering::Relaxed);
+
+            if offset + sz > self.size {
+                panic!("out of memory");
+            }
+
+            let inner = unsafe {
+                let inner = self.area.offset(offset as isize).cast::<T>();
+                *inner = value;
+                NonNull::new_unchecked(inner)
+            };
+
+            AllocatedBox {
+                inner,
+                _phantom: PhantomData,
+            }
+    }
+        */
+}
--- a/libs/neonart/src/lib.rs
+++ b/libs/neonart/src/lib.rs
@@ -122,15 +122,14 @@
 //! - Removing values has not been implemented

 mod algorithm;
-mod allocator;
+pub mod allocator;
 mod epoch;

 use algorithm::RootPtr;

-use allocator::AllocatedBox;
-
 use std::fmt::Debug;
 use std::marker::PhantomData;
+use std::ptr::NonNull;
 use std::sync::atomic::{AtomicBool, Ordering};

 use crate::epoch::EpochPin;
@@ -138,7 +137,8 @@ use crate::epoch::EpochPin;
 #[cfg(test)]
 mod tests;

-pub use allocator::Allocator;
+use allocator::ArtAllocator;
+pub use allocator::ArtMultiSlabAllocator;

 /// Fixed-length key type.
 ///
@@ -154,31 +154,36 @@ pub trait Key: Clone + Debug {
 /// the old sticks around until all readers that might see the old value are gone.
 pub trait Value: Clone {}

-struct Tree<K: Key, V: Value> {
+pub struct Tree<V: Value> {
    root: RootPtr<V>,

    writer_attached: AtomicBool,
+}
+
+unsafe impl<V: Value + Sync> Sync for Tree<V> {}
+unsafe impl<V: Value + Send> Send for Tree<V> {}
+
+/// Struct created at postmaster startup
+pub struct TreeInitStruct<'t, K: Key, V: Value, A: ArtAllocator<V>> {
+    tree: &'t Tree<V>,
+
+    allocator: &'t A,

    phantom_key: PhantomData<K>,
 }

-/// Struct created at postmaster startup
-pub struct TreeInitStruct<'t, K: Key, V: Value> {
-    tree: AllocatedBox<'t, Tree<K, V>>,
-
-    allocator: &'t Allocator,
-}
-
 /// The worker process has a reference to this. The write operations are only safe
 /// from the worker process
-pub struct TreeWriteAccess<'t, K: Key, V: Value>
+pub struct TreeWriteAccess<'t, K: Key, V: Value, A: ArtAllocator<V>>
 where
    K: Key,
    V: Value,
 {
-    tree: AllocatedBox<'t, Tree<K, V>>,
+    tree: &'t Tree<V>,

-    allocator: &'t Allocator,
+    allocator: &'t A,
+
+    phantom_key: PhantomData<K>,
 }

 /// The backends have a reference to this. It cannot be used to modify the tree
@@ -187,21 +192,29 @@ where
    K: Key,
    V: Value,
 {
-    tree: AllocatedBox<'t, Tree<K, V>>,
+    tree: &'t Tree<V>,
+
+    phantom_key: PhantomData<K>,
 }

-impl<'a, 't: 'a, K: Key, V: Value> TreeInitStruct<'t, K, V> {
-    pub fn new(allocator: &'t Allocator) -> TreeInitStruct<'t, K, V> {
-        let tree = allocator.alloc(Tree {
+impl<'a, 't: 'a, K: Key, V: Value, A: ArtAllocator<V>> TreeInitStruct<'t, K, V, A> {
+    pub fn new(allocator: &'t A) -> TreeInitStruct<'t, K, V, A> {
+        let tree_ptr = allocator.alloc_tree();
+        let tree_ptr = NonNull::new(tree_ptr).expect("out of memory");
+        let init = Tree {
            root: algorithm::new_root(allocator),
            writer_attached: AtomicBool::new(false),
-            phantom_key: PhantomData,
-        });
+        };
+        unsafe { tree_ptr.write(init) };

-        TreeInitStruct { tree, allocator }
+        TreeInitStruct {
+            tree: unsafe { tree_ptr.as_ref() },
+            allocator,
+            phantom_key: PhantomData,
+        }
    }

-    pub fn attach_writer(self) -> TreeWriteAccess<'t, K, V> {
+    pub fn attach_writer(self) -> TreeWriteAccess<'t, K, V, A> {
        let previously_attached = self.tree.writer_attached.swap(true, Ordering::Relaxed);
        if previously_attached {
            panic!("writer already attached");
@@ -209,21 +222,26 @@ impl<'a, 't: 'a, K: Key, V: Value> TreeInitStruct<'t, K, V> {
        TreeWriteAccess {
            tree: self.tree,
            allocator: self.allocator,
+            phantom_key: PhantomData,
        }
    }

    pub fn attach_reader(self) -> TreeReadAccess<'t, K, V> {
-        TreeReadAccess { tree: self.tree }
+        TreeReadAccess {
+            tree: self.tree,
+            phantom_key: PhantomData,
+        }
    }
 }

-impl<'t, K: Key + Clone, V: Value> TreeWriteAccess<'t, K, V> {
-    pub fn start_write(&'t self) -> TreeWriteGuard<'t, K, V> {
+impl<'t, K: Key + Clone, V: Value, A: ArtAllocator<V>> TreeWriteAccess<'t, K, V, A> {
+    pub fn start_write(&'t self) -> TreeWriteGuard<'t, K, V, A> {
        // TODO: grab epoch guard
        TreeWriteGuard {
            allocator: self.allocator,
            tree: &self.tree,
            epoch_pin: epoch::pin_epoch(),
+            phantom_key: PhantomData,
        }
    }

@@ -231,6 +249,7 @@ impl<'t, K: Key + Clone, V: Value> TreeWriteAccess<'t, K, V> {
        TreeReadGuard {
            tree: &self.tree,
            epoch_pin: epoch::pin_epoch(),
+            phantom_key: PhantomData,
        }
    }
 }
@@ -240,6 +259,7 @@ impl<'t, K: Key + Clone, V: Value> TreeReadAccess<'t, K, V> {
        TreeReadGuard {
            tree: &self.tree,
            epoch_pin: epoch::pin_epoch(),
+            phantom_key: PhantomData,
        }
    }
 }
@@ -249,9 +269,10 @@ where
    K: Key,
    V: Value,
 {
-    tree: &'t AllocatedBox<'t, Tree<K, V>>,
+    tree: &'t Tree<V>,

    epoch_pin: EpochPin,
+    phantom_key: PhantomData<K>,
 }

 impl<'t, K: Key, V: Value> TreeReadGuard<'t, K, V> {
@@ -260,18 +281,19 @@ impl<'t, K: Key, V: Value> TreeReadGuard<'t, K, V> {
    }
 }

-pub struct TreeWriteGuard<'t, K, V>
+pub struct TreeWriteGuard<'t, K, V, A>
 where
    K: Key,
    V: Value,
 {
-    tree: &'t AllocatedBox<'t, Tree<K, V>>,
-    allocator: &'t Allocator,
+    tree: &'t Tree<V>,
+    allocator: &'t A,

    epoch_pin: EpochPin,
+    phantom_key: PhantomData<K>,
 }

-impl<'t, K: Key, V: Value> TreeWriteGuard<'t, K, V> {
+impl<'t, K: Key, V: Value, A: ArtAllocator<V>> TreeWriteGuard<'t, K, V, A> {
    pub fn insert(&mut self, key: &K, value: V) {
        self.update_with_fn(key, |_| Some(value))
    }
@@ -294,7 +316,7 @@ impl<'t, K: Key, V: Value> TreeWriteGuard<'t, K, V> {
    }
 }

-impl<'t, K: Key, V: Value + Debug> TreeWriteGuard<'t, K, V> {
+impl<'t, K: Key, V: Value + Debug> TreeReadGuard<'t, K, V> {
    pub fn dump(&mut self) {
        algorithm::dump_tree(self.tree.root, &self.epoch_pin)
    }
--- a/pgxn/neon/communicator/src/integrated_cache.rs
+++ b/pgxn/neon/communicator/src/integrated_cache.rs
@@ -11,6 +11,8 @@
 //! Note: This deals with "relations", which is really just one "relation fork" in Postgres
 //! terms. RelFileLocator + ForkNumber is the key.

+use std::mem::MaybeUninit;
+
 use utils::lsn::Lsn;

 use crate::file_cache::{CacheBlock, FileCache};
@@ -21,20 +23,28 @@ use neonart::TreeInitStruct;

 const CACHE_AREA_SIZE: usize = 10 * 1024 * 1024;

+type IntegratedCacheTreeInitStruct<'t> =
+    TreeInitStruct<'t, TreeKey, TreeEntry, neonart::ArtMultiSlabAllocator<'t, TreeEntry>>;
+
 /// This struct is stored in the shared memory segment.
 struct IntegratedCacheShmemData {
-    allocator: neonart::Allocator,
+    allocator: &'static neonart::ArtMultiSlabAllocator<'static, TreeEntry>,
 }

 /// This struct is initialized at postmaster startup, and passed to all the processes via fork().
 pub struct IntegratedCacheInitStruct<'t> {
    shmem_data: &'t IntegratedCacheShmemData,
-    handle: TreeInitStruct<'t, TreeKey, TreeEntry>,
+    handle: IntegratedCacheTreeInitStruct<'t>,
 }

 /// Represents write-access to the integrated cache. This is used by the communicator process.
 pub struct IntegratedCacheWriteAccess<'t> {
-    cache_tree: neonart::TreeWriteAccess<'t, TreeKey, TreeEntry>,
+    cache_tree: neonart::TreeWriteAccess<
+        't,
+        TreeKey,
+        TreeEntry,
+        neonart::ArtMultiSlabAllocator<'t, TreeEntry>,
+    >,

    global_lw_lsn: Lsn,

@@ -72,8 +82,9 @@ impl<'t> IntegratedCacheInitStruct<'t> {
        let area_ptr = ptr;
        let area_size = shmem_area.len() - len_used;

-        let cache_area: &mut [u8] = unsafe { std::slice::from_raw_parts_mut(area_ptr, area_size) };
-        let allocator = neonart::Allocator::new(cache_area);
+        let cache_area: &mut [MaybeUninit<u8>] =
+            unsafe { std::slice::from_raw_parts_mut(area_ptr.cast(), area_size) };
+        let allocator = neonart::ArtMultiSlabAllocator::new(cache_area);

        // Initialize the shared memory area
        let shmem_data = unsafe {
@@ -81,7 +92,7 @@ impl<'t> IntegratedCacheInitStruct<'t> {
            &*shmem_data_ptr
        };

-        let tree_handle = TreeInitStruct::new(&shmem_data.allocator);
+        let tree_handle = IntegratedCacheTreeInitStruct::new(&shmem_data.allocator);

        IntegratedCacheInitStruct {
            shmem_data,