mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-05 20:42:54 +00:00
Add testing utilities for hash map, freelist bugfixes
This commit is contained in:
@@ -20,7 +20,7 @@ pub mod entry;
|
||||
mod tests;
|
||||
|
||||
use core::{CoreHashMap, INVALID_POS};
|
||||
use entry::{Entry, OccupiedEntry};
|
||||
use entry::{Entry, OccupiedEntry, PrevPos};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct OutOfMemoryError();
|
||||
@@ -289,15 +289,15 @@ where
|
||||
for i in old_num_buckets..num_buckets {
|
||||
let bucket_ptr = buckets_ptr.add(i as usize);
|
||||
bucket_ptr.write(core::Bucket {
|
||||
next: if i < num_buckets {
|
||||
next: if i < num_buckets-1 {
|
||||
i as u32 + 1
|
||||
} else {
|
||||
inner.free_head
|
||||
},
|
||||
prev: if i > 0 {
|
||||
i as u32 - 1
|
||||
PrevPos::Chained(i as u32 - 1)
|
||||
} else {
|
||||
INVALID_POS
|
||||
PrevPos::First(INVALID_POS)
|
||||
},
|
||||
inner: None,
|
||||
});
|
||||
@@ -315,6 +315,10 @@ where
|
||||
if num_buckets > map.inner.get_num_buckets() as u32 {
|
||||
panic!("shrink called with a larger number of buckets");
|
||||
}
|
||||
_ = self
|
||||
.shmem_handle
|
||||
.as_ref()
|
||||
.expect("shrink called on a fixed-size hash table");
|
||||
map.inner.alloc_limit = num_buckets;
|
||||
}
|
||||
|
||||
@@ -342,9 +346,21 @@ where
|
||||
// Would require making a wider error type enum with this and shmem errors.
|
||||
panic!("unevicted entries in shrinked space")
|
||||
}
|
||||
let prev_pos = inner.buckets[i].prev;
|
||||
if prev_pos != INVALID_POS {
|
||||
inner.buckets[prev_pos as usize].next = inner.buckets[i].next;
|
||||
match inner.buckets[i].prev {
|
||||
PrevPos::First(_) => {
|
||||
let next_pos = inner.buckets[i].next;
|
||||
inner.free_head = next_pos;
|
||||
if next_pos != INVALID_POS {
|
||||
inner.buckets[next_pos as usize].prev = PrevPos::First(INVALID_POS);
|
||||
}
|
||||
},
|
||||
PrevPos::Chained(j) => {
|
||||
let next_pos = inner.buckets[i].next;
|
||||
inner.buckets[j as usize].next = next_pos;
|
||||
if next_pos != INVALID_POS {
|
||||
inner.buckets[next_pos as usize].prev = PrevPos::Chained(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -358,6 +374,7 @@ where
|
||||
let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
|
||||
let buckets_ptr = inner.buckets.as_mut_ptr();
|
||||
self.rehash_dict(inner, buckets_ptr, end_ptr, num_buckets, num_buckets);
|
||||
inner.alloc_limit = INVALID_POS;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ pub(crate) const INVALID_POS: u32 = u32::MAX;
|
||||
// Bucket
|
||||
pub(crate) struct Bucket<K, V> {
|
||||
pub(crate) next: u32,
|
||||
pub(crate) prev: u32,
|
||||
pub(crate) prev: PrevPos,
|
||||
pub(crate) inner: Option<(K, V)>,
|
||||
}
|
||||
|
||||
@@ -65,9 +65,9 @@ where
|
||||
INVALID_POS
|
||||
},
|
||||
prev: if i > 0 {
|
||||
i as u32 - 1
|
||||
PrevPos::Chained(i as u32 - 1)
|
||||
} else {
|
||||
INVALID_POS
|
||||
PrevPos::First(INVALID_POS)
|
||||
},
|
||||
inner: None,
|
||||
});
|
||||
@@ -176,24 +176,15 @@ where
|
||||
_key: key.clone(), // TODO(quantumish): clone unavoidable?
|
||||
bucket_pos: pos as u32,
|
||||
map: self,
|
||||
prev_pos: if prev == INVALID_POS {
|
||||
// TODO(quantumish): populating this correctly would require an O(n) scan over the dictionary
|
||||
// (perhaps not if we refactored the prev field to be itself something like PrevPos). The real
|
||||
// question though is whether this even needs to be populated correctly? All downstream uses of
|
||||
// this function so far are just for deletion, which isn't really concerned with the dictionary.
|
||||
// Then again, it's unintuitive to appear to return a normal OccupiedEntry which really is fake.
|
||||
PrevPos::First(todo!("unclear what to do here"))
|
||||
} else {
|
||||
PrevPos::Chained(prev)
|
||||
}
|
||||
prev_pos: prev,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result<u32, FullError> {
|
||||
let mut pos = self.free_head;
|
||||
|
||||
let mut prev = PrevPos::First(self.free_head);
|
||||
while pos!= INVALID_POS && pos >= self.alloc_limit {
|
||||
while pos != INVALID_POS && pos >= self.alloc_limit {
|
||||
let bucket = &mut self.buckets[pos as usize];
|
||||
prev = PrevPos::Chained(pos);
|
||||
pos = bucket.next;
|
||||
@@ -204,16 +195,23 @@ where
|
||||
match prev {
|
||||
PrevPos::First(_) => {
|
||||
let next_pos = self.buckets[pos as usize].next;
|
||||
self.free_head = next_pos;
|
||||
self.buckets[next_pos as usize].prev = INVALID_POS;
|
||||
self.free_head = next_pos;
|
||||
// HACK(quantumish): Really, the INVALID_POS should be the position within the dictionary.
|
||||
// This isn't passed into this function, though, and so for now rather than changing that
|
||||
// we can just check it from `alloc_bucket`. Not a great solution.
|
||||
if next_pos != INVALID_POS {
|
||||
self.buckets[next_pos as usize].prev = PrevPos::First(INVALID_POS);
|
||||
}
|
||||
}
|
||||
PrevPos::Chained(p) => if p != INVALID_POS {
|
||||
let next_pos = self.buckets[pos as usize].next;
|
||||
self.buckets[p as usize].next = next_pos;
|
||||
self.buckets[next_pos as usize].prev = p;
|
||||
if next_pos != INVALID_POS {
|
||||
self.buckets[next_pos as usize].prev = PrevPos::Chained(p);
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
let bucket = &mut self.buckets[pos as usize];
|
||||
self.buckets_in_use += 1;
|
||||
bucket.next = INVALID_POS;
|
||||
|
||||
@@ -10,6 +10,7 @@ pub enum Entry<'a, 'b, K, V> {
|
||||
Vacant(VacantEntry<'a, 'b, K, V>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) enum PrevPos {
|
||||
First(u32),
|
||||
Chained(u32),
|
||||
@@ -58,10 +59,14 @@ impl<'a, 'b, K, V> OccupiedEntry<'a, 'b, K, V> {
|
||||
}
|
||||
}
|
||||
|
||||
// and add it to the freelist
|
||||
// and add it to the freelist
|
||||
if self.map.free_head != INVALID_POS {
|
||||
self.map.buckets[self.map.free_head as usize].prev = PrevPos::Chained(self.bucket_pos);
|
||||
}
|
||||
let bucket = &mut self.map.buckets[self.bucket_pos as usize];
|
||||
let old_value = bucket.inner.take();
|
||||
bucket.next = self.map.free_head;
|
||||
bucket.next = self.map.free_head;
|
||||
bucket.prev = PrevPos::First(INVALID_POS);
|
||||
self.map.free_head = self.bucket_pos;
|
||||
self.map.buckets_in_use -= 1;
|
||||
|
||||
@@ -82,6 +87,9 @@ impl<'a, 'b, K: Clone + Hash + Eq, V> VacantEntry<'a, 'b, K, V> {
|
||||
return Err(FullError());
|
||||
}
|
||||
let bucket = &mut self.map.buckets[pos as usize];
|
||||
if let PrevPos::First(INVALID_POS) = bucket.prev {
|
||||
bucket.prev = PrevPos::First(self.dict_pos);
|
||||
}
|
||||
bucket.next = self.map.dictionary[self.dict_pos as usize];
|
||||
self.map.dictionary[self.dict_pos as usize] = pos;
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::mem::uninitialized;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use crate::hash::HashMapAccess;
|
||||
@@ -132,8 +134,6 @@ fn apply_op(
|
||||
map: &mut HashMapAccess<TestKey, usize>,
|
||||
shadow: &mut BTreeMap<TestKey, usize>,
|
||||
) {
|
||||
eprintln!("applying op: {op:?}");
|
||||
|
||||
// apply the change to the shadow tree first
|
||||
let shadow_existing = if let Some(v) = op.1 {
|
||||
shadow.insert(op.0, v)
|
||||
@@ -161,16 +161,42 @@ fn apply_op(
|
||||
assert_eq!(shadow_existing, hash_existing);
|
||||
}
|
||||
|
||||
fn do_random_ops(
|
||||
num_ops: usize,
|
||||
size: u32,
|
||||
del_prob: f64,
|
||||
writer: &mut HashMapAccess<TestKey, usize>,
|
||||
shadow: &mut BTreeMap<TestKey, usize>,
|
||||
rng: &mut rand::rngs::ThreadRng,
|
||||
) {
|
||||
for i in 0..num_ops {
|
||||
let key: TestKey = ((rng.next_u32() % size) as u128).into();
|
||||
let op = TestOp(key, if rng.random_bool(del_prob) { Some(i) } else { None });
|
||||
apply_op(&op, writer, shadow);
|
||||
}
|
||||
}
|
||||
|
||||
fn do_deletes(
|
||||
num_ops: usize,
|
||||
writer: &mut HashMapAccess<TestKey, usize>,
|
||||
shadow: &mut BTreeMap<TestKey, usize>,
|
||||
) {
|
||||
for i in 0..num_ops {
|
||||
let (k, _) = shadow.pop_first().unwrap();
|
||||
let hash = writer.get_hash_value(&k);
|
||||
writer.remove_with_hash(&k, hash);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn random_ops() {
|
||||
const MAX_MEM_SIZE: usize = 10000000;
|
||||
let shmem = ShmemHandle::new("test_inserts", 0, MAX_MEM_SIZE).unwrap();
|
||||
let shmem = ShmemHandle::new("test_inserts", 0, 10000000).unwrap();
|
||||
|
||||
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(100000, shmem);
|
||||
let mut writer = init_struct.attach_writer();
|
||||
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
|
||||
|
||||
let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();
|
||||
let mut rng = rand::rng();
|
||||
for i in 0..100000 {
|
||||
@@ -190,88 +216,97 @@ fn random_ops() {
|
||||
|
||||
#[test]
|
||||
fn test_grow() {
|
||||
const MEM_SIZE: usize = 10000000;
|
||||
let shmem = ShmemHandle::new("test_grow", 0, MEM_SIZE).unwrap();
|
||||
|
||||
let shmem = ShmemHandle::new("test_grow", 0, 10000000).unwrap();
|
||||
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(1000, shmem);
|
||||
let mut writer = init_struct.attach_writer();
|
||||
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
|
||||
let mut rng = rand::rng();
|
||||
for i in 0..10000 {
|
||||
let key: TestKey = ((rng.next_u32() % 1000) as u128).into();
|
||||
|
||||
let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
|
||||
|
||||
apply_op(&op, &mut writer, &mut shadow);
|
||||
|
||||
if i % 1000 == 0 {
|
||||
eprintln!("{i} ops processed");
|
||||
//eprintln!("stats: {:?}", tree_writer.get_statistics());
|
||||
//test_iter(&tree_writer, &shadow);
|
||||
}
|
||||
}
|
||||
|
||||
do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
|
||||
writer.grow(1500).unwrap();
|
||||
|
||||
for i in 0..10000 {
|
||||
let key: TestKey = ((rng.next_u32() % 1500) as u128).into();
|
||||
|
||||
let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
|
||||
|
||||
apply_op(&op, &mut writer, &mut shadow);
|
||||
|
||||
if i % 1000 == 0 {
|
||||
eprintln!("{i} ops processed");
|
||||
//eprintln!("stats: {:?}", tree_writer.get_statistics());
|
||||
//test_iter(&tree_writer, &shadow);
|
||||
}
|
||||
}
|
||||
do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_shrink() {
|
||||
const MEM_SIZE: usize = 10000000;
|
||||
let shmem = ShmemHandle::new("test_shrink", 0, MEM_SIZE).unwrap();
|
||||
|
||||
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(1500, shmem);
|
||||
let mut writer = init_struct.attach_writer();
|
||||
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
|
||||
let mut rng = rand::rng();
|
||||
for i in 0..100 {
|
||||
let key: TestKey = ((rng.next_u32() % 1500) as u128).into();
|
||||
|
||||
let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
|
||||
|
||||
apply_op(&op, &mut writer, &mut shadow);
|
||||
|
||||
if i % 1000 == 0 {
|
||||
eprintln!("{i} ops processed");
|
||||
}
|
||||
}
|
||||
|
||||
writer.begin_shrink(1000);
|
||||
for i in 1000..1500 {
|
||||
if let Some(entry) = writer.entry_at_bucket(i) {
|
||||
fn do_shrink(
|
||||
writer: &mut HashMapAccess<TestKey, usize>,
|
||||
shadow: &mut BTreeMap<TestKey, usize>,
|
||||
from: u32,
|
||||
to: u32
|
||||
) {
|
||||
writer.begin_shrink(to);
|
||||
for i in to..from {
|
||||
if let Some(entry) = writer.entry_at_bucket(i as usize) {
|
||||
shadow.remove(&entry._key);
|
||||
entry.remove();
|
||||
}
|
||||
}
|
||||
writer.finish_shrink().unwrap();
|
||||
|
||||
for i in 0..10000 {
|
||||
let key: TestKey = ((rng.next_u32() % 1000) as u128).into();
|
||||
|
||||
let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
|
||||
|
||||
apply_op(&op, &mut writer, &mut shadow);
|
||||
|
||||
if i % 1000 == 0 {
|
||||
eprintln!("{i} ops processed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shrink() {
|
||||
let shmem = ShmemHandle::new("test_shrink", 0, 10000000).unwrap();
|
||||
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(1500, shmem);
|
||||
let mut writer = init_struct.attach_writer();
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
let mut rng = rand::rng();
|
||||
|
||||
do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
|
||||
do_shrink(&mut writer, &mut shadow, 1500, 1000);
|
||||
do_deletes(500, &mut writer, &mut shadow);
|
||||
do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng);
|
||||
assert!(writer.get_num_buckets_in_use() <= 1000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shrink_grow_seq() {
|
||||
let shmem = ShmemHandle::new("test_shrink", 0, 10000000).unwrap();
|
||||
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(1500, shmem);
|
||||
let mut writer = init_struct.attach_writer();
|
||||
let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
|
||||
let mut rng = rand::rng();
|
||||
|
||||
do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng);
|
||||
eprintln!("Shrinking to 750");
|
||||
do_shrink(&mut writer, &mut shadow, 1000, 750);
|
||||
do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng);
|
||||
eprintln!("Growing to 1500");
|
||||
writer.grow(1500).unwrap();
|
||||
do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng);
|
||||
eprintln!("Shrinking to 200");
|
||||
do_shrink(&mut writer, &mut shadow, 1500, 200);
|
||||
do_deletes(100, &mut writer, &mut shadow);
|
||||
do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
|
||||
eprintln!("Growing to 10k");
|
||||
writer.grow(10000).unwrap();
|
||||
do_random_ops(10000, 5000, 0.25, &mut writer, &mut shadow, &mut rng);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_shrink_bigger() {
|
||||
let shmem = ShmemHandle::new("test_shrink", 0, 10000000).unwrap();
|
||||
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(1500, shmem);
|
||||
let mut writer = init_struct.attach_writer();
|
||||
writer.begin_shrink(2000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_shrink_early_finish() {
|
||||
let shmem = ShmemHandle::new("test_shrink", 0, 10000000).unwrap();
|
||||
let init_struct = HashMapInit::<TestKey, usize>::init_in_shmem(1500, shmem);
|
||||
let mut writer = init_struct.attach_writer();
|
||||
writer.finish_shrink().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_shrink_fixed_size() {
|
||||
let mut area = [MaybeUninit::uninit(); 10000];
|
||||
let init_struct = HashMapInit::<TestKey, usize>::init_in_fixed_area(3, &mut area);
|
||||
let mut writer = init_struct.attach_writer();
|
||||
writer.begin_shrink(1);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user