mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 01:02:55 +00:00
Merge branch 'issue/136' into tantivy-imhotep
This commit is contained in:
@@ -80,8 +80,14 @@ impl Heap {
|
||||
pub fn set<Item>(&self, addr: u32, val: &Item) {
|
||||
self.inner().set(addr, val);
|
||||
}
|
||||
|
||||
/// Returns a mutable reference for an object at a given Item.
|
||||
|
||||
/// Returns a reference to an `Item` at a given `addr`.
|
||||
#[cfg(test)]
|
||||
pub fn get_ref<Item>(&self, addr: u32) -> &Item {
|
||||
self.inner().get_mut_ref(addr)
|
||||
}
|
||||
|
||||
/// Returns a mutable reference to an `Item` at a given `addr`.
|
||||
pub fn get_mut_ref<Item>(&self, addr: u32) -> &mut Item {
|
||||
self.inner().get_mut_ref(addr)
|
||||
}
|
||||
@@ -94,9 +100,8 @@ impl Heap {
|
||||
|
||||
struct InnerHeap {
|
||||
buffer: Vec<u8>,
|
||||
buffer_len: u32,
|
||||
used: u32,
|
||||
next_heap: Option<Box<InnerHeap>>,
|
||||
has_been_resized: bool,
|
||||
}
|
||||
|
||||
|
||||
@@ -106,15 +111,13 @@ impl InnerHeap {
|
||||
let buffer: Vec<u8> = vec![0u8; num_bytes];
|
||||
InnerHeap {
|
||||
buffer: buffer,
|
||||
buffer_len: num_bytes as u32,
|
||||
next_heap: None,
|
||||
used: 0u32,
|
||||
has_been_resized: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
self.used = 0u32;
|
||||
self.next_heap = None;
|
||||
}
|
||||
|
||||
pub fn capacity(&self,) -> u32 {
|
||||
@@ -124,47 +127,31 @@ impl InnerHeap {
|
||||
// Returns the number of free bytes. If the buffer
|
||||
// has reached it's capacity and overflowed to another buffer, return 0.
|
||||
pub fn num_free_bytes(&self,) -> u32 {
|
||||
if self.next_heap.is_some() {
|
||||
if self.has_been_resized {
|
||||
0u32
|
||||
}
|
||||
else {
|
||||
self.buffer_len - self.used
|
||||
(self.buffer.len() as u32) - self.used
|
||||
}
|
||||
}
|
||||
|
||||
pub fn allocate_space(&mut self, num_bytes: usize) -> u32 {
|
||||
let addr = self.used;
|
||||
self.used += num_bytes as u32;
|
||||
if self.used <= self.buffer_len {
|
||||
addr
|
||||
let buffer_len = self.buffer.len();
|
||||
if self.used > buffer_len as u32 {
|
||||
self.buffer.resize(buffer_len * 2, 0u8);
|
||||
self.has_been_resized = true
|
||||
}
|
||||
else {
|
||||
if self.next_heap.is_none() {
|
||||
warn!("Exceeded heap size. The margin was apparently unsufficient. The segment will be committed right after indexing this very last document.");
|
||||
self.next_heap = Some(Box::new(InnerHeap::with_capacity(self.buffer_len as usize)));
|
||||
}
|
||||
self.next_heap.as_mut().unwrap().allocate_space(num_bytes) + self.buffer_len
|
||||
}
|
||||
|
||||
|
||||
addr
|
||||
}
|
||||
|
||||
fn get_slice(&self, start: u32, stop: u32) -> &[u8] {
|
||||
if start >= self.buffer_len {
|
||||
self.next_heap.as_ref().unwrap().get_slice(start - self.buffer_len, stop - self.buffer_len)
|
||||
}
|
||||
else {
|
||||
&self.buffer[start as usize..stop as usize]
|
||||
}
|
||||
&self.buffer[start as usize..stop as usize]
|
||||
}
|
||||
|
||||
fn get_mut_slice(&mut self, start: u32, stop: u32) -> &mut [u8] {
|
||||
if start >= self.buffer_len {
|
||||
self.next_heap.as_mut().unwrap().get_mut_slice(start - self.buffer_len, stop - self.buffer_len)
|
||||
}
|
||||
else {
|
||||
&mut self.buffer[start as usize..stop as usize]
|
||||
}
|
||||
&mut self.buffer[start as usize..stop as usize]
|
||||
}
|
||||
|
||||
fn allocate_and_set(&mut self, data: &[u8]) -> BytesRef {
|
||||
@@ -178,38 +165,23 @@ impl InnerHeap {
|
||||
}
|
||||
|
||||
fn get_mut(&mut self, addr: u32) -> *mut u8 {
|
||||
if addr >= self.buffer_len {
|
||||
self.next_heap.as_mut().unwrap().get_mut(addr - self.buffer_len)
|
||||
}
|
||||
else {
|
||||
let addr_isize = addr as isize;
|
||||
unsafe { self.buffer.as_mut_ptr().offset(addr_isize) }
|
||||
}
|
||||
let addr_isize = addr as isize;
|
||||
unsafe { self.buffer.as_mut_ptr().offset(addr_isize) }
|
||||
}
|
||||
|
||||
fn get_mut_ref<Item>(&mut self, addr: u32) -> &mut Item {
|
||||
if addr >= self.buffer_len {
|
||||
self.next_heap.as_mut().unwrap().get_mut_ref(addr - self.buffer_len)
|
||||
}
|
||||
else {
|
||||
let v_ptr_u8 = self.get_mut(addr) as *mut u8;
|
||||
let v_ptr = v_ptr_u8 as *mut Item;
|
||||
unsafe { &mut *v_ptr }
|
||||
}
|
||||
let v_ptr_u8 = self.get_mut(addr) as *mut u8;
|
||||
let v_ptr = v_ptr_u8 as *mut Item;
|
||||
unsafe { &mut *v_ptr }
|
||||
}
|
||||
|
||||
fn set<Item>(&mut self, addr: u32, val: &Item) {
|
||||
if addr >= self.buffer_len {
|
||||
self.next_heap.as_mut().unwrap().set(addr - self.buffer_len, val);
|
||||
}
|
||||
else {
|
||||
let v_ptr: *const Item = val as *const Item;
|
||||
let v_ptr_u8: *const u8 = v_ptr as *const u8;
|
||||
debug_assert!(addr + mem::size_of::<Item>() as u32 <= self.used);
|
||||
unsafe {
|
||||
let dest_ptr: *mut u8 = self.get_mut(addr);
|
||||
ptr::copy(v_ptr_u8, dest_ptr, mem::size_of::<Item>());
|
||||
}
|
||||
let v_ptr: *const Item = val as *const Item;
|
||||
let v_ptr_u8: *const u8 = v_ptr as *const u8;
|
||||
debug_assert!(addr + mem::size_of::<Item>() as u32 <= self.used);
|
||||
unsafe {
|
||||
let dest_ptr: *mut u8 = self.get_mut(addr);
|
||||
ptr::copy(v_ptr_u8, dest_ptr, mem::size_of::<Item>());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -202,7 +202,7 @@ impl IndexMerger {
|
||||
merged_doc_id_map.push(segment_local_map);
|
||||
}
|
||||
|
||||
let mut field = Field(u32::max_value());
|
||||
let mut last_field: Option<Field> = None;
|
||||
|
||||
while merged_terms.advance() {
|
||||
// Create the total list of doc ids
|
||||
@@ -239,10 +239,11 @@ impl IndexMerger {
|
||||
if let Some(remapped_doc_id) = old_to_new_doc_id[segment_postings.doc() as usize] {
|
||||
if !term_written {
|
||||
let current_field = term.field();
|
||||
if current_field != field {
|
||||
if last_field != Some(current_field) {
|
||||
postings_serializer.new_field(current_field);
|
||||
field = current_field;
|
||||
last_field = Some(current_field);
|
||||
}
|
||||
|
||||
// we make sure to only write the term iff
|
||||
// there is at least one document.
|
||||
postings_serializer.new_term(term.as_slice())?;
|
||||
|
||||
@@ -197,6 +197,7 @@ fn write<'a>(
|
||||
try!(fast_field_writers.serialize(serializer.get_fast_field_serializer()));
|
||||
try!(fieldnorms_writer.serialize(serializer.get_fieldnorms_serializer()));
|
||||
try!(serializer.close());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ const INT_TERM_LEN: usize = 4 + 8;
|
||||
pub struct Term(Vec<u8>);
|
||||
|
||||
/// Extract `field` from Term.
|
||||
#[doc(hidden)]
|
||||
pub(crate) fn extract_field_from_term_bytes(term_bytes: &[u8]) -> Field {
|
||||
Field(BigEndian::read_u32(&term_bytes[..4]))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user