mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-23 19:50:42 +00:00
Remove some unsafe stuff, justified some of it.
This commit is contained in:
@@ -379,8 +379,10 @@ impl FacetCollector {
|
||||
})
|
||||
.sum();
|
||||
if count > 0u64 {
|
||||
let bytes = facet_merger.key().to_owned();
|
||||
facet_counts.insert(Facet::from_encoded(bytes), count);
|
||||
let bytes: Vec<u8> = facet_merger.key().to_owned();
|
||||
// may create an corrupted facet if the term dicitonary is corrupted
|
||||
let facet = unsafe { Facet::from_encoded(bytes) };
|
||||
facet_counts.insert(facet, count);
|
||||
}
|
||||
}
|
||||
FacetCounts { facet_counts }
|
||||
@@ -452,9 +454,9 @@ impl FacetCounts {
|
||||
let right_bound = if facet.is_root() {
|
||||
Bound::Unbounded
|
||||
} else {
|
||||
let mut facet_after_bytes = facet.encoded_bytes().to_owned();
|
||||
let mut facet_after_bytes: Vec<u8> = facet.encoded_bytes().to_owned();
|
||||
facet_after_bytes.push(1u8);
|
||||
let facet_after = Facet::from_encoded(facet_after_bytes);
|
||||
let facet_after = unsafe { Facet::from_encoded(facet_after_bytes) }; // ok logic
|
||||
Bound::Excluded(facet_after)
|
||||
};
|
||||
let underlying: btree_map::Range<_, _> = self.facet_counts.range((left_bound, right_bound));
|
||||
|
||||
@@ -17,7 +17,7 @@ mod murmurhash2 {
|
||||
|
||||
let num_blocks = len >> 2;
|
||||
for _ in 0..num_blocks {
|
||||
let mut k: u32 = unsafe { *key_ptr };
|
||||
let mut k: u32 = unsafe { *key_ptr }; // ok because of num_blocks definition
|
||||
k = k.wrapping_mul(M);
|
||||
k ^= k >> 24;
|
||||
k = k.wrapping_mul(M);
|
||||
|
||||
@@ -67,6 +67,11 @@ impl<Item: FastValue> FastFieldReader<Item> {
|
||||
/// associated with the `DocId` going from
|
||||
/// `start` to `start + output.len()`.
|
||||
///
|
||||
/// Regardless of the type of `Item`, this method works
|
||||
/// - transmuting the output array
|
||||
/// - extracting the `Item`s as if they were `u64`
|
||||
/// - possibly converting the `u64` value to the right type.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `start + output.len()` is greater than
|
||||
@@ -75,7 +80,7 @@ impl<Item: FastValue> FastFieldReader<Item> {
|
||||
// TODO change start to `u64`.
|
||||
// For multifastfield, start is an index in a second fastfield, not a `DocId`
|
||||
pub fn get_range(&self, start: u32, output: &mut [Item]) {
|
||||
let output_u64: &mut [u64] = unsafe { mem::transmute(output) };
|
||||
let output_u64: &mut [u64] = unsafe { mem::transmute(output) }; // ok: Item is either `u64` or `i64`
|
||||
self.bit_unpacker.get_range(start, output_u64);
|
||||
for out in output_u64.iter_mut() {
|
||||
*out = Item::from_u64(*out + self.min_value_u64).as_u64();
|
||||
|
||||
@@ -138,8 +138,7 @@ impl<'a> SegmentWriter<'a> {
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let mut term = unsafe { Term::with_capacity(100) };
|
||||
term.set_field(field);
|
||||
let mut term = Term::for_field(field); // we set the Term
|
||||
for facet_bytes in facets {
|
||||
let mut unordered_term_id_opt = None;
|
||||
let fake_str = unsafe { str::from_utf8_unchecked(facet_bytes) };
|
||||
|
||||
@@ -194,8 +194,7 @@ pub trait PostingsWriter {
|
||||
token_stream: &mut TokenStream,
|
||||
heap: &Heap,
|
||||
) -> u32 {
|
||||
let mut term = unsafe { Term::with_capacity(100) };
|
||||
term.set_field(field);
|
||||
let mut term = Term::for_field(field);
|
||||
let num_tokens = {
|
||||
let mut sink = |token: &Token| {
|
||||
term.set_text(token.text.as_str());
|
||||
|
||||
@@ -279,19 +279,11 @@ impl Postings for SegmentPostings {
|
||||
|
||||
fn positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) {
|
||||
if self.position_computer.is_some() {
|
||||
let prev_capacity = output.capacity();
|
||||
let term_freq = self.term_freq() as usize;
|
||||
if term_freq > prev_capacity {
|
||||
let additional_len = term_freq - output.len();
|
||||
output.reserve(additional_len);
|
||||
}
|
||||
unsafe {
|
||||
output.set_len(term_freq);
|
||||
self.position_computer
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.positions_with_offset(offset, &mut output[..])
|
||||
}
|
||||
output.resize(self.term_freq() as usize, 0u32);
|
||||
self.position_computer
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.positions_with_offset(offset, &mut output[..])
|
||||
} else {
|
||||
output.clear();
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ const HORIZON_NUM_TINYBITSETS: usize = 64;
|
||||
const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32;
|
||||
|
||||
// `drain_filter` is not stable yet.
|
||||
// This function is similar except that it does is not unsafe, and
|
||||
// This function is similar except that it does is not unstable, and
|
||||
// it does not keep the original vector ordering.
|
||||
//
|
||||
// Also, it does not "yield" any elements.
|
||||
|
||||
@@ -54,7 +54,7 @@ impl Facet {
|
||||
}
|
||||
|
||||
/// Creates a `Facet` from its binary representation.
|
||||
pub(crate) fn from_encoded(encoded_bytes: Vec<u8>) -> Facet {
|
||||
pub(crate) unsafe fn from_encoded(encoded_bytes: Vec<u8>) -> Facet {
|
||||
Facet(encoded_bytes)
|
||||
}
|
||||
|
||||
|
||||
@@ -74,8 +74,10 @@ impl Term {
|
||||
/// It is declared unsafe, as the term content
|
||||
/// is not initialized, and a call to `.field()`
|
||||
/// would panic.
|
||||
pub(crate) unsafe fn with_capacity(num_bytes: usize) -> Term {
|
||||
Term(Vec::with_capacity(num_bytes))
|
||||
pub(crate) fn for_field(field: Field) -> Term {
|
||||
let mut term = Term(Vec::with_capacity(100));
|
||||
term.set_field(field);
|
||||
term
|
||||
}
|
||||
|
||||
/// Returns the field.
|
||||
|
||||
@@ -198,7 +198,7 @@ mod tests {
|
||||
let mut term_string = String::new();
|
||||
while term_it.advance() {
|
||||
//let term = Term::from_bytes(term_it.key());
|
||||
term_string.push_str(unsafe { str::from_utf8_unchecked(term_it.key()) });
|
||||
term_string.push_str(unsafe { str::from_utf8_unchecked(term_it.key()) }); // ok test
|
||||
}
|
||||
assert_eq!(&*term_string, "abcdef");
|
||||
}
|
||||
|
||||
@@ -92,7 +92,9 @@ fn extract_bits(data: &[u8], addr_bits: usize, num_bits: u8) -> u64 {
|
||||
assert!(num_bits <= 56);
|
||||
let addr_byte = addr_bits / 8;
|
||||
let bit_shift = (addr_bits % 8) as u64;
|
||||
assert!(data.len() >= addr_byte + 8);
|
||||
let val_unshifted_unmasked: u64 = unsafe {
|
||||
//< ok : check len above
|
||||
let addr = data.as_ptr().offset(addr_byte as isize) as *const u64;
|
||||
ptr::read_unaligned(addr)
|
||||
};
|
||||
|
||||
@@ -95,7 +95,7 @@ mod tests {
|
||||
let mut tokens = vec![];
|
||||
{
|
||||
let mut add_token = |token: &Token| {
|
||||
let facet = Facet::from_encoded(token.text.as_bytes().to_owned());
|
||||
let facet = unsafe { Facet::from_encoded(token.text.as_bytes().to_owned()) }; // ok test
|
||||
tokens.push(format!("{}", facet));
|
||||
};
|
||||
FacetTokenizer
|
||||
@@ -115,11 +115,11 @@ mod tests {
|
||||
let mut tokens = vec![];
|
||||
{
|
||||
let mut add_token = |token: &Token| {
|
||||
let facet = Facet::from_encoded(token.text.as_bytes().to_owned());
|
||||
let facet = unsafe { Facet::from_encoded(token.text.as_bytes().to_owned()) }; // ok test
|
||||
tokens.push(format!("{}", facet));
|
||||
};
|
||||
FacetTokenizer
|
||||
.token_stream(unsafe { str::from_utf8_unchecked(facet.encoded_bytes()) })
|
||||
.token_stream(unsafe { str::from_utf8_unchecked(facet.encoded_bytes()) }) // ok test
|
||||
.process(&mut add_token);
|
||||
}
|
||||
assert_eq!(tokens.len(), 1);
|
||||
|
||||
Reference in New Issue
Block a user