add fuzz test for hashmap (#2310)

This commit is contained in:
PSeitz
2024-01-31 10:30:21 +01:00
committed by GitHub
parent 48630ceec9
commit 1223a87eb2
4 changed files with 70 additions and 0 deletions

View File

@@ -11,6 +11,7 @@ description = "term hashmap used for indexing"
murmurhash32 = "0.3" murmurhash32 = "0.3"
common = { version = "0.6", path = "../common/", package = "tantivy-common" } common = { version = "0.6", path = "../common/", package = "tantivy-common" }
ahash = { version = "0.8.3", default-features = false, optional = true } ahash = { version = "0.8.3", default-features = false, optional = true }
rand_distr = "0.4.3"
[[bench]] [[bench]]
harness = false harness = false

View File

@@ -0,0 +1,15 @@
[package]
name = "fuzz_test"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
ahash = "0.8.7"
rand = "0.8.5"
rand_distr = "0.4.3"
tantivy-stacker = { version = "0.2.0", path = ".." }
[workspace]

View File

@@ -0,0 +1,45 @@
use ahash::AHashMap;
use rand::{rngs::StdRng, Rng, SeedableRng};
use rand_distr::Exp;
use tantivy_stacker::ArenaHashMap;
fn main() {
for _ in 0..1_000_000 {
let seed: u64 = rand::random();
test_with_seed(seed);
}
}
fn test_with_seed(seed: u64) {
let mut hash_map = AHashMap::new();
let mut arena_hashmap = ArenaHashMap::default();
let mut rng = StdRng::seed_from_u64(seed);
let key_count = rng.gen_range(1_000..=1_000_000);
let exp = Exp::new(0.05).unwrap();
for _ in 0..key_count {
let key_length = rng.sample::<f32, _>(exp).min(u16::MAX as f32).max(1.0) as usize;
let key: Vec<u8> = (0..key_length).map(|_| rng.gen()).collect();
arena_hashmap.mutate_or_create(&key, |current_count| {
let count: u64 = current_count.unwrap_or(0);
count + 1
});
hash_map.entry(key).and_modify(|e| *e += 1).or_insert(1);
}
println!(
"Seed: {} \t {:.2}MB",
seed,
arena_hashmap.memory_arena.len() as f32 / 1024.0 / 1024.0
);
// Check the contents of the ArenaHashMap
for (key, addr) in arena_hashmap.iter() {
let count: u64 = arena_hashmap.read(addr);
let count_expected = hash_map
.get(key)
.unwrap_or_else(|| panic!("NOT FOUND: Key: {:?}, Count: {}", key, count));
assert_eq!(count, *count_expected);
}
}

View File

@@ -113,6 +113,15 @@ impl MemoryArena {
self.pages.len() * PAGE_SIZE self.pages.len() * PAGE_SIZE
} }
/// Returns the number of bytes allocated in the arena.
pub fn len(&self) -> usize {
self.pages.len().saturating_sub(1) * PAGE_SIZE + self.pages.last().unwrap().len
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline] #[inline]
pub fn write_at<Item: Copy + 'static>(&mut self, addr: Addr, val: Item) { pub fn write_at<Item: Copy + 'static>(&mut self, addr: Addr, val: Item) {
let dest = self.slice_mut(addr, std::mem::size_of::<Item>()); let dest = self.slice_mut(addr, std::mem::size_of::<Item>());