Remove some unsafe stuff, justified some of it.

This commit is contained in:
Paul Masurel
2018-05-07 23:57:53 -07:00
parent 9a0b7f9855
commit 24050d0eb5
12 changed files with 32 additions and 31 deletions

View File

@@ -379,8 +379,10 @@ impl FacetCollector {
})
.sum();
if count > 0u64 {
let bytes = facet_merger.key().to_owned();
facet_counts.insert(Facet::from_encoded(bytes), count);
let bytes: Vec<u8> = facet_merger.key().to_owned();
// may create an corrupted facet if the term dicitonary is corrupted
let facet = unsafe { Facet::from_encoded(bytes) };
facet_counts.insert(facet, count);
}
}
FacetCounts { facet_counts }
@@ -452,9 +454,9 @@ impl FacetCounts {
let right_bound = if facet.is_root() {
Bound::Unbounded
} else {
let mut facet_after_bytes = facet.encoded_bytes().to_owned();
let mut facet_after_bytes: Vec<u8> = facet.encoded_bytes().to_owned();
facet_after_bytes.push(1u8);
let facet_after = Facet::from_encoded(facet_after_bytes);
let facet_after = unsafe { Facet::from_encoded(facet_after_bytes) }; // ok logic
Bound::Excluded(facet_after)
};
let underlying: btree_map::Range<_, _> = self.facet_counts.range((left_bound, right_bound));

View File

@@ -17,7 +17,7 @@ mod murmurhash2 {
let num_blocks = len >> 2;
for _ in 0..num_blocks {
let mut k: u32 = unsafe { *key_ptr };
let mut k: u32 = unsafe { *key_ptr }; // ok because of num_blocks definition
k = k.wrapping_mul(M);
k ^= k >> 24;
k = k.wrapping_mul(M);

View File

@@ -67,6 +67,11 @@ impl<Item: FastValue> FastFieldReader<Item> {
/// associated with the `DocId` going from
/// `start` to `start + output.len()`.
///
/// Regardless of the type of `Item`, this method works
/// - transmuting the output array
/// - extracting the `Item`s as if they were `u64`
/// - possibly converting the `u64` value to the right type.
///
/// # Panics
///
/// May panic if `start + output.len()` is greater than
@@ -75,7 +80,7 @@ impl<Item: FastValue> FastFieldReader<Item> {
// TODO change start to `u64`.
// For multifastfield, start is an index in a second fastfield, not a `DocId`
pub fn get_range(&self, start: u32, output: &mut [Item]) {
let output_u64: &mut [u64] = unsafe { mem::transmute(output) };
let output_u64: &mut [u64] = unsafe { mem::transmute(output) }; // ok: Item is either `u64` or `i64`
self.bit_unpacker.get_range(start, output_u64);
for out in output_u64.iter_mut() {
*out = Item::from_u64(*out + self.min_value_u64).as_u64();

View File

@@ -138,8 +138,7 @@ impl<'a> SegmentWriter<'a> {
}
})
.collect();
let mut term = unsafe { Term::with_capacity(100) };
term.set_field(field);
let mut term = Term::for_field(field); // we set the Term
for facet_bytes in facets {
let mut unordered_term_id_opt = None;
let fake_str = unsafe { str::from_utf8_unchecked(facet_bytes) };

View File

@@ -194,8 +194,7 @@ pub trait PostingsWriter {
token_stream: &mut TokenStream,
heap: &Heap,
) -> u32 {
let mut term = unsafe { Term::with_capacity(100) };
term.set_field(field);
let mut term = Term::for_field(field);
let num_tokens = {
let mut sink = |token: &Token| {
term.set_text(token.text.as_str());

View File

@@ -279,19 +279,11 @@ impl Postings for SegmentPostings {
fn positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) {
if self.position_computer.is_some() {
let prev_capacity = output.capacity();
let term_freq = self.term_freq() as usize;
if term_freq > prev_capacity {
let additional_len = term_freq - output.len();
output.reserve(additional_len);
}
unsafe {
output.set_len(term_freq);
self.position_computer
.as_mut()
.unwrap()
.positions_with_offset(offset, &mut output[..])
}
output.resize(self.term_freq() as usize, 0u32);
self.position_computer
.as_mut()
.unwrap()
.positions_with_offset(offset, &mut output[..])
} else {
output.clear();
}

View File

@@ -10,7 +10,7 @@ const HORIZON_NUM_TINYBITSETS: usize = 64;
const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32;
// `drain_filter` is not stable yet.
// This function is similar except that it does is not unsafe, and
// This function is similar except that it does is not unstable, and
// it does not keep the original vector ordering.
//
// Also, it does not "yield" any elements.

View File

@@ -54,7 +54,7 @@ impl Facet {
}
/// Creates a `Facet` from its binary representation.
pub(crate) fn from_encoded(encoded_bytes: Vec<u8>) -> Facet {
pub(crate) unsafe fn from_encoded(encoded_bytes: Vec<u8>) -> Facet {
Facet(encoded_bytes)
}

View File

@@ -74,8 +74,10 @@ impl Term {
/// It is declared unsafe, as the term content
/// is not initialized, and a call to `.field()`
/// would panic.
pub(crate) unsafe fn with_capacity(num_bytes: usize) -> Term {
Term(Vec::with_capacity(num_bytes))
pub(crate) fn for_field(field: Field) -> Term {
let mut term = Term(Vec::with_capacity(100));
term.set_field(field);
term
}
/// Returns the field.

View File

@@ -198,7 +198,7 @@ mod tests {
let mut term_string = String::new();
while term_it.advance() {
//let term = Term::from_bytes(term_it.key());
term_string.push_str(unsafe { str::from_utf8_unchecked(term_it.key()) });
term_string.push_str(unsafe { str::from_utf8_unchecked(term_it.key()) }); // ok test
}
assert_eq!(&*term_string, "abcdef");
}

View File

@@ -92,7 +92,9 @@ fn extract_bits(data: &[u8], addr_bits: usize, num_bits: u8) -> u64 {
assert!(num_bits <= 56);
let addr_byte = addr_bits / 8;
let bit_shift = (addr_bits % 8) as u64;
assert!(data.len() >= addr_byte + 8);
let val_unshifted_unmasked: u64 = unsafe {
//< ok : check len above
let addr = data.as_ptr().offset(addr_byte as isize) as *const u64;
ptr::read_unaligned(addr)
};

View File

@@ -95,7 +95,7 @@ mod tests {
let mut tokens = vec![];
{
let mut add_token = |token: &Token| {
let facet = Facet::from_encoded(token.text.as_bytes().to_owned());
let facet = unsafe { Facet::from_encoded(token.text.as_bytes().to_owned()) }; // ok test
tokens.push(format!("{}", facet));
};
FacetTokenizer
@@ -115,11 +115,11 @@ mod tests {
let mut tokens = vec![];
{
let mut add_token = |token: &Token| {
let facet = Facet::from_encoded(token.text.as_bytes().to_owned());
let facet = unsafe { Facet::from_encoded(token.text.as_bytes().to_owned()) }; // ok test
tokens.push(format!("{}", facet));
};
FacetTokenizer
.token_stream(unsafe { str::from_utf8_unchecked(facet.encoded_bytes()) })
.token_stream(unsafe { str::from_utf8_unchecked(facet.encoded_bytes()) }) // ok test
.process(&mut add_token);
}
assert_eq!(tokens.len(), 1);