mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 01:02:55 +00:00
Added limitation on term dictionary saturation
This commit is contained in:
@@ -81,6 +81,10 @@ impl<'a> HashMap<'a> {
|
||||
(hash as usize) & self.mask
|
||||
}
|
||||
|
||||
pub fn is_saturated(&self) -> bool {
|
||||
self.table.len() < self.occupied.len() * 3
|
||||
}
|
||||
|
||||
fn get_key(&self, bytes_ref: BytesRef) -> &[u8] {
|
||||
self.heap.get_slice(bytes_ref)
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ use std::thread;
|
||||
|
||||
// Size of the margin for the heap. A segment is closed when the remaining memory
|
||||
// in the heap goes below MARGIN_IN_BYTES.
|
||||
pub const MARGIN_IN_BYTES: u32 = 10_000_000u32;
|
||||
pub const MARGIN_IN_BYTES: u32 = 1_000_000u32;
|
||||
|
||||
// We impose the memory per thread to be at least 30 MB.
|
||||
pub const HEAP_SIZE_LIMIT: u32 = MARGIN_IN_BYTES * 3u32;
|
||||
@@ -269,6 +269,11 @@ fn index_documents(heap: &mut Heap,
|
||||
segment_writer.max_doc());
|
||||
break;
|
||||
}
|
||||
if segment_writer.is_termdictionary_saturated() {
|
||||
info!("Term dic saturated, flushing segment with maxdoc={}.",
|
||||
segment_writer.max_doc());
|
||||
break;
|
||||
}
|
||||
}
|
||||
let num_docs = segment_writer.max_doc();
|
||||
|
||||
|
||||
@@ -309,8 +309,6 @@ impl SegmentUpdater {
|
||||
let merging_join_handle = thread::spawn(move || {
|
||||
|
||||
// first we need to apply deletes to our segment.
|
||||
info!("Start merge: {:?}", segment_ids_vec);
|
||||
|
||||
let merged_segment = segment_updater_clone.new_segment();
|
||||
let merged_segment_id = merged_segment.id();
|
||||
let merge_result = perform_merge(&segment_ids_vec, &segment_updater_clone, merged_segment, target_opstamp);
|
||||
|
||||
@@ -97,6 +97,11 @@ impl<'a> SegmentWriter<'a> {
|
||||
self.heap.num_free_bytes() <= MARGIN_IN_BYTES
|
||||
}
|
||||
|
||||
pub fn is_termdictionary_saturated(&self,) -> bool {
|
||||
self.multifield_postings.is_termdictionary_saturated()
|
||||
}
|
||||
|
||||
|
||||
/// Indexes a new document
|
||||
///
|
||||
/// As a user, you should rather use `IndexWriter`'s add_document.
|
||||
|
||||
@@ -66,7 +66,6 @@ impl<'a> MultiFieldPostingsWriter<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn index_text(&mut self,
|
||||
doc: DocId,
|
||||
field: Field,
|
||||
@@ -123,6 +122,10 @@ impl<'a> MultiFieldPostingsWriter<'a> {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn is_termdictionary_saturated(&self) -> bool {
|
||||
self.term_index.is_saturated()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user