From dcc92d287ebb488854a95aa1f00455e6de3428dd Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Mon, 17 Dec 2018 19:08:48 +0900 Subject: [PATCH] Facet remove unsafe (#456) * Removing some unsafe * Removing some unsafe (2) * Remove murmurhash --- Cargo.toml | 1 + src/postings/stacker/mod.rs | 2 - src/postings/stacker/murmurhash2.rs | 87 ---------------------------- src/postings/stacker/term_hashmap.rs | 7 ++- 4 files changed, 6 insertions(+), 91 deletions(-) delete mode 100644 src/postings/stacker/murmurhash2.rs diff --git a/Cargo.toml b/Cargo.toml index 558ce4607..d4f1a9ed0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,7 @@ failure = "0.1" htmlescape = "0.3.1" fail = "0.2" scoped-pool = "1.0" +murmurhash32 = "0.1" [target.'cfg(windows)'.dependencies] winapi = "0.2" diff --git a/src/postings/stacker/mod.rs b/src/postings/stacker/mod.rs index 3fdf3e850..78fbf456c 100644 --- a/src/postings/stacker/mod.rs +++ b/src/postings/stacker/mod.rs @@ -1,9 +1,7 @@ mod expull; mod memory_arena; -mod murmurhash2; mod term_hashmap; pub use self::expull::ExpUnrolledLinkedList; pub use self::memory_arena::{Addr, ArenaStorable, MemoryArena}; -use self::murmurhash2::murmurhash2; pub use self::term_hashmap::{compute_table_size, TermHashMap}; diff --git a/src/postings/stacker/murmurhash2.rs b/src/postings/stacker/murmurhash2.rs deleted file mode 100644 index 9626dcb53..000000000 --- a/src/postings/stacker/murmurhash2.rs +++ /dev/null @@ -1,87 +0,0 @@ -use std::ptr; -const SEED: u32 = 3_242_157_231u32; -const M: u32 = 0x5bd1_e995; - -#[inline(always)] -pub fn murmurhash2(key: &[u8]) -> u32 { - #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] - let mut key_ptr: *const u32 = key.as_ptr() as *const u32; - let len = key.len() as u32; - let mut h: u32 = SEED ^ len; - - let num_blocks = len >> 2; - for _ in 0..num_blocks { - let mut k: u32 = unsafe { ptr::read_unaligned(key_ptr) }; // ok because of num_blocks definition - k = k.wrapping_mul(M); - k ^= k >> 24; - k = k.wrapping_mul(M); - h = h.wrapping_mul(M); - h ^= k; - key_ptr = key_ptr.wrapping_offset(1); - } - - // Handle the last few bytes of the input array - let remaining: &[u8] = &key[key.len() & !3..]; - match remaining.len() { - 3 => { - h ^= u32::from(remaining[2]) << 16; - h ^= u32::from(remaining[1]) << 8; - h ^= u32::from(remaining[0]); - h = h.wrapping_mul(M); - } - 2 => { - h ^= u32::from(remaining[1]) << 8; - h ^= u32::from(remaining[0]); - h = h.wrapping_mul(M); - } - 1 => { - h ^= u32::from(remaining[0]); - h = h.wrapping_mul(M); - } - _ => {} - } - h ^= h >> 13; - h = h.wrapping_mul(M); - h ^ (h >> 15) -} - -#[cfg(test)] -mod test { - - use super::murmurhash2; - use std::collections::HashSet; - - #[test] - fn test_murmur() { - let s1 = "abcdef"; - let s2 = "abcdeg"; - for i in 0..5 { - assert_eq!( - murmurhash2(&s1[i..5].as_bytes()), - murmurhash2(&s2[i..5].as_bytes()) - ); - } - } - - #[test] - fn test_murmur_against_reference_impl() { - assert_eq!(murmurhash2("".as_bytes()), 3632506080); - assert_eq!(murmurhash2("a".as_bytes()), 455683869); - assert_eq!(murmurhash2("ab".as_bytes()), 2448092234); - assert_eq!(murmurhash2("abc".as_bytes()), 2066295634); - assert_eq!(murmurhash2("abcd".as_bytes()), 2588571162); - assert_eq!(murmurhash2("abcde".as_bytes()), 2988696942); - assert_eq!(murmurhash2("abcdefghijklmnop".as_bytes()), 2350868870); - } - - #[test] - fn test_murmur_collisions() { - let mut set: HashSet = HashSet::default(); - for i in 0..10_000 { - let s = format!("hash{}", i); - let hash = murmurhash2(s.as_bytes()); - set.insert(hash); - } - assert_eq!(set.len(), 10_000); - } -} diff --git a/src/postings/stacker/term_hashmap.rs b/src/postings/stacker/term_hashmap.rs index 47ee3d5c7..2ec71de05 100644 --- a/src/postings/stacker/term_hashmap.rs +++ b/src/postings/stacker/term_hashmap.rs @@ -1,4 +1,7 @@ -use super::murmurhash2; +extern crate murmurhash32; + +use self::murmurhash32::murmurhash2; + use super::{Addr, ArenaStorable, MemoryArena}; use std::iter; use std::mem; @@ -206,7 +209,7 @@ impl TermHashMap { self.resize(); } let key_bytes: &[u8] = key.as_ref(); - let hash = murmurhash2::murmurhash2(key.as_ref()); + let hash = murmurhash2(key.as_ref()); let mut probe = self.probe(hash); loop { let bucket = probe.next_probe();