This commit is contained in:
Paul Masurel
2018-02-03 00:21:05 +09:00
parent dd028841e8
commit df53dc4ceb
44 changed files with 520 additions and 578 deletions

View File

@@ -1,7 +1,7 @@
use std::iter;
use std::mem;
use postings::UnorderedTermId;
use super::heap::{Heap, HeapAllocable, BytesRef};
use super::heap::{BytesRef, Heap, HeapAllocable};
mod murmurhash2 {
@@ -53,9 +53,6 @@ mod murmurhash2 {
}
}
/// Split the thread memory budget into
/// - the heap size
/// - the hash table "table" itself.
@@ -63,14 +60,10 @@ mod murmurhash2 {
/// Returns (the heap size in bytes, the hash table size in number of bits)
pub(crate) fn split_memory(per_thread_memory_budget: usize) -> (usize, usize) {
let table_size_limit: usize = per_thread_memory_budget / 3;
let compute_table_size = |num_bits: usize| {
(1 << num_bits) * mem::size_of::<KeyValue>()
};
let compute_table_size = |num_bits: usize| (1 << num_bits) * mem::size_of::<KeyValue>();
let table_num_bits: usize = (1..)
.into_iter()
.take_while(|num_bits: &usize| {
compute_table_size(*num_bits) < table_size_limit
})
.take_while(|num_bits: &usize| compute_table_size(*num_bits) < table_size_limit)
.last()
.expect(&format!(
"Per thread memory is too small: {}",
@@ -81,7 +74,6 @@ pub(crate) fn split_memory(per_thread_memory_budget: usize) -> (usize, usize) {
(heap_size, table_num_bits)
}
/// `KeyValue` is the item stored in the hash table.
/// The key is actually a `BytesRef` object stored in an external heap.
/// The `value_addr` also points to an address in the heap.
@@ -101,7 +93,6 @@ impl KeyValue {
}
}
/// Customized `HashMap` with string keys
///
/// This `HashMap` takes String as keys. Keys are
@@ -118,7 +109,6 @@ pub struct TermHashMap<'a> {
occupied: Vec<usize>,
}
struct QuadraticProbing {
hash: usize,
i: usize,
@@ -141,7 +131,6 @@ impl QuadraticProbing {
}
}
impl<'a> TermHashMap<'a> {
pub fn new(num_bucket_power_of_2: usize, heap: &'a Heap) -> TermHashMap<'a> {
let table_size = 1 << num_bucket_power_of_2;
@@ -178,18 +167,17 @@ impl<'a> TermHashMap<'a> {
}
pub fn iter<'b: 'a>(&'b self) -> impl Iterator<Item = (&'a [u8], u32, UnorderedTermId)> + 'b {
self.occupied
.iter()
.cloned()
.map(move |bucket: usize| {
let kv = self.table[bucket];
let (key, offset) = self.get_key_value(kv.key_value_addr);
(key, offset, bucket as UnorderedTermId)
})
self.occupied.iter().cloned().map(move |bucket: usize| {
let kv = self.table[bucket];
let (key, offset) = self.get_key_value(kv.key_value_addr);
(key, offset, bucket as UnorderedTermId)
})
}
pub fn get_or_create<S: AsRef<[u8]>, V: HeapAllocable>(&mut self, key: S) -> (UnorderedTermId, &mut V) {
pub fn get_or_create<S: AsRef<[u8]>, V: HeapAllocable>(
&mut self,
key: S,
) -> (UnorderedTermId, &mut V) {
let key_bytes: &[u8] = key.as_ref();
let hash = murmurhash2::murmurhash2(key.as_ref());
let mut probe = self.probe(hash);
@@ -212,7 +200,6 @@ impl<'a> TermHashMap<'a> {
}
}
#[cfg(test)]
mod tests {
@@ -223,7 +210,6 @@ mod tests {
use std::collections::HashSet;
use super::split_memory;
struct TestValue {
val: u32,
_addr: u32,
@@ -245,7 +231,6 @@ mod tests {
assert_eq!(split_memory(10_000_000), (7902848, 18));
}
#[test]
fn test_hash_map() {
let heap = Heap::with_capacity(2_000_000);
@@ -319,5 +304,4 @@ mod tests {
});
}
}

View File

@@ -39,6 +39,5 @@ fn test_unrolled_linked_list() {
assert!(!it.next().is_some());
}
}
}
}