Added limitation on term dictionary saturation

This commit is contained in:
Paul Masurel
2017-05-09 14:10:33 +09:00
parent ffb62b6835
commit 90bc3e3773
5 changed files with 19 additions and 4 deletions

View File

@@ -81,6 +81,10 @@ impl<'a> HashMap<'a> {
(hash as usize) & self.mask
}
pub fn is_saturated(&self) -> bool {
self.table.len() < self.occupied.len() * 3
}
fn get_key(&self, bytes_ref: BytesRef) -> &[u8] {
self.heap.get_slice(bytes_ref)
}

View File

@@ -36,7 +36,7 @@ use std::thread;
// Size of the margin for the heap. A segment is closed when the remaining memory
// in the heap goes below MARGIN_IN_BYTES.
pub const MARGIN_IN_BYTES: u32 = 10_000_000u32;
pub const MARGIN_IN_BYTES: u32 = 1_000_000u32;
// We impose the memory per thread to be at least 30 MB.
pub const HEAP_SIZE_LIMIT: u32 = MARGIN_IN_BYTES * 3u32;
@@ -269,6 +269,11 @@ fn index_documents(heap: &mut Heap,
segment_writer.max_doc());
break;
}
if segment_writer.is_termdictionary_saturated() {
info!("Term dic saturated, flushing segment with maxdoc={}.",
segment_writer.max_doc());
break;
}
}
let num_docs = segment_writer.max_doc();

View File

@@ -309,8 +309,6 @@ impl SegmentUpdater {
let merging_join_handle = thread::spawn(move || {
// first we need to apply deletes to our segment.
info!("Start merge: {:?}", segment_ids_vec);
let merged_segment = segment_updater_clone.new_segment();
let merged_segment_id = merged_segment.id();
let merge_result = perform_merge(&segment_ids_vec, &segment_updater_clone, merged_segment, target_opstamp);

View File

@@ -97,6 +97,11 @@ impl<'a> SegmentWriter<'a> {
self.heap.num_free_bytes() <= MARGIN_IN_BYTES
}
pub fn is_termdictionary_saturated(&self,) -> bool {
self.multifield_postings.is_termdictionary_saturated()
}
/// Indexes a new document
///
/// As a user, you should rather use `IndexWriter`'s add_document.

View File

@@ -66,7 +66,6 @@ impl<'a> MultiFieldPostingsWriter<'a> {
}
}
pub fn index_text(&mut self,
doc: DocId,
field: Field,
@@ -123,6 +122,10 @@ impl<'a> MultiFieldPostingsWriter<'a> {
}
Ok(())
}
pub fn is_termdictionary_saturated(&self) -> bool {
self.term_index.is_saturated()
}
}