Merge branch 'issue/136' into tantivy-imhotep

This commit is contained in:
Paul Masurel
2017-05-12 17:04:20 +09:00
4 changed files with 35 additions and 62 deletions

View File

@@ -80,8 +80,14 @@ impl Heap {
pub fn set<Item>(&self, addr: u32, val: &Item) {
self.inner().set(addr, val);
}
/// Returns a mutable reference for an object at a given Item.
/// Returns a reference to an `Item` at a given `addr`.
#[cfg(test)]
pub fn get_ref<Item>(&self, addr: u32) -> &Item {
self.inner().get_mut_ref(addr)
}
/// Returns a mutable reference to an `Item` at a given `addr`.
pub fn get_mut_ref<Item>(&self, addr: u32) -> &mut Item {
self.inner().get_mut_ref(addr)
}
@@ -94,9 +100,8 @@ impl Heap {
struct InnerHeap {
buffer: Vec<u8>,
buffer_len: u32,
used: u32,
next_heap: Option<Box<InnerHeap>>,
has_been_resized: bool,
}
@@ -106,15 +111,13 @@ impl InnerHeap {
let buffer: Vec<u8> = vec![0u8; num_bytes];
InnerHeap {
buffer: buffer,
buffer_len: num_bytes as u32,
next_heap: None,
used: 0u32,
has_been_resized: false,
}
}
pub fn clear(&mut self) {
self.used = 0u32;
self.next_heap = None;
}
pub fn capacity(&self,) -> u32 {
@@ -124,47 +127,31 @@ impl InnerHeap {
// Returns the number of free bytes. If the buffer
// has reached it's capacity and overflowed to another buffer, return 0.
pub fn num_free_bytes(&self,) -> u32 {
if self.next_heap.is_some() {
if self.has_been_resized {
0u32
}
else {
self.buffer_len - self.used
(self.buffer.len() as u32) - self.used
}
}
pub fn allocate_space(&mut self, num_bytes: usize) -> u32 {
let addr = self.used;
self.used += num_bytes as u32;
if self.used <= self.buffer_len {
addr
let buffer_len = self.buffer.len();
if self.used > buffer_len as u32 {
self.buffer.resize(buffer_len * 2, 0u8);
self.has_been_resized = true
}
else {
if self.next_heap.is_none() {
warn!("Exceeded heap size. The margin was apparently unsufficient. The segment will be committed right after indexing this very last document.");
self.next_heap = Some(Box::new(InnerHeap::with_capacity(self.buffer_len as usize)));
}
self.next_heap.as_mut().unwrap().allocate_space(num_bytes) + self.buffer_len
}
addr
}
fn get_slice(&self, start: u32, stop: u32) -> &[u8] {
if start >= self.buffer_len {
self.next_heap.as_ref().unwrap().get_slice(start - self.buffer_len, stop - self.buffer_len)
}
else {
&self.buffer[start as usize..stop as usize]
}
&self.buffer[start as usize..stop as usize]
}
fn get_mut_slice(&mut self, start: u32, stop: u32) -> &mut [u8] {
if start >= self.buffer_len {
self.next_heap.as_mut().unwrap().get_mut_slice(start - self.buffer_len, stop - self.buffer_len)
}
else {
&mut self.buffer[start as usize..stop as usize]
}
&mut self.buffer[start as usize..stop as usize]
}
fn allocate_and_set(&mut self, data: &[u8]) -> BytesRef {
@@ -178,38 +165,23 @@ impl InnerHeap {
}
fn get_mut(&mut self, addr: u32) -> *mut u8 {
if addr >= self.buffer_len {
self.next_heap.as_mut().unwrap().get_mut(addr - self.buffer_len)
}
else {
let addr_isize = addr as isize;
unsafe { self.buffer.as_mut_ptr().offset(addr_isize) }
}
let addr_isize = addr as isize;
unsafe { self.buffer.as_mut_ptr().offset(addr_isize) }
}
fn get_mut_ref<Item>(&mut self, addr: u32) -> &mut Item {
if addr >= self.buffer_len {
self.next_heap.as_mut().unwrap().get_mut_ref(addr - self.buffer_len)
}
else {
let v_ptr_u8 = self.get_mut(addr) as *mut u8;
let v_ptr = v_ptr_u8 as *mut Item;
unsafe { &mut *v_ptr }
}
let v_ptr_u8 = self.get_mut(addr) as *mut u8;
let v_ptr = v_ptr_u8 as *mut Item;
unsafe { &mut *v_ptr }
}
fn set<Item>(&mut self, addr: u32, val: &Item) {
if addr >= self.buffer_len {
self.next_heap.as_mut().unwrap().set(addr - self.buffer_len, val);
}
else {
let v_ptr: *const Item = val as *const Item;
let v_ptr_u8: *const u8 = v_ptr as *const u8;
debug_assert!(addr + mem::size_of::<Item>() as u32 <= self.used);
unsafe {
let dest_ptr: *mut u8 = self.get_mut(addr);
ptr::copy(v_ptr_u8, dest_ptr, mem::size_of::<Item>());
}
let v_ptr: *const Item = val as *const Item;
let v_ptr_u8: *const u8 = v_ptr as *const u8;
debug_assert!(addr + mem::size_of::<Item>() as u32 <= self.used);
unsafe {
let dest_ptr: *mut u8 = self.get_mut(addr);
ptr::copy(v_ptr_u8, dest_ptr, mem::size_of::<Item>());
}
}
}

View File

@@ -202,7 +202,7 @@ impl IndexMerger {
merged_doc_id_map.push(segment_local_map);
}
let mut field = Field(u32::max_value());
let mut last_field: Option<Field> = None;
while merged_terms.advance() {
// Create the total list of doc ids
@@ -239,10 +239,11 @@ impl IndexMerger {
if let Some(remapped_doc_id) = old_to_new_doc_id[segment_postings.doc() as usize] {
if !term_written {
let current_field = term.field();
if current_field != field {
if last_field != Some(current_field) {
postings_serializer.new_field(current_field);
field = current_field;
last_field = Some(current_field);
}
// we make sure to only write the term iff
// there is at least one document.
postings_serializer.new_term(term.as_slice())?;

View File

@@ -197,6 +197,7 @@ fn write<'a>(
try!(fast_field_writers.serialize(serializer.get_fast_field_serializer()));
try!(fieldnorms_writer.serialize(serializer.get_fieldnorms_serializer()));
try!(serializer.close());
Ok(())
}

View File

@@ -16,7 +16,6 @@ const INT_TERM_LEN: usize = 4 + 8;
pub struct Term(Vec<u8>);
/// Extract `field` from Term.
#[doc(hidden)]
pub(crate) fn extract_field_from_term_bytes(term_bytes: &[u8]) -> Field {
Field(BigEndian::read_u32(&term_bytes[..4]))
}