Files
tantivy/stacker/example/hashmap.rs
PSeitz e83abbfe4a perf: faster term hash map (#1940)
* add term hashmap benchmark

* refactor arena hashmap

add inlines
remove occupied array and use table_entry.is_empty instead (saves 4 bytes per entry)
reduce saturation threshold from 1/3 to 1/2 to reduce memory
use u32 for UnorderedId (we have the 4billion limit anyways on the Columnar stuff)
fix naming LinearProbing
remove byteorder dependency

memory consumption went down from 2Gb to 1.8GB on indexing wikipedia dataset in tantivy

* Update stacker/src/arena_hashmap.rs

Co-authored-by: Paul Masurel <paul@quickwit.io>

---------

Co-authored-by: Paul Masurel <paul@quickwit.io>
2023-04-17 09:07:33 +02:00

28 lines
655 B
Rust

use tantivy_stacker::ArenaHashMap;
const ALICE: &str = include_str!("../../benches/alice.txt");
fn main() {
create_hash_map((0..100_000_000).map(|el| el.to_string()));
for _ in 0..1000 {
create_hash_map(ALICE.split_whitespace());
}
}
fn create_hash_map<'a, T: AsRef<str>>(terms: impl Iterator<Item = T>) -> ArenaHashMap {
let mut map = ArenaHashMap::with_capacity(4);
for term in terms {
map.mutate_or_create(term.as_ref().as_bytes(), |val| {
if let Some(mut val) = val {
val += 1;
val
} else {
1u64
}
});
}
map
}