Capping IndexWriter Num thread (#1033)

* capping num threads of index writter to MAX_NUM_THREAD = 8

* fixed formating

* run ci

* fix bug from max to min
This commit is contained in:
Evance Soumaoro
2021-05-06 11:44:39 +00:00
committed by GitHub
parent 2aced2d958
commit 8d51e9cc91
2 changed files with 7 additions and 3 deletions

View File

@@ -13,7 +13,7 @@ use crate::directory::INDEX_WRITER_LOCK;
use crate::directory::{Directory, RamDirectory};
use crate::error::DataCorruption;
use crate::error::TantivyError;
use crate::indexer::index_writer::HEAP_SIZE_MIN;
use crate::indexer::index_writer::{HEAP_SIZE_MIN, MAX_NUM_THREAD};
use crate::indexer::segment_updater::save_new_metas;
use crate::reader::IndexReader;
use crate::reader::IndexReaderBuilder;
@@ -432,7 +432,8 @@ impl Index {
/// Creates a multithreaded writer
///
/// Tantivy will automatically define the number of threads to use.
/// Tantivy will automatically define the number of threads to use, but
/// no more than [`MAX_NUM_THREAD`] threads.
/// `overall_heap_size_in_bytes` is the total target memory usage that will be split
/// between a given number of threads.
///
@@ -441,7 +442,7 @@ impl Index {
/// # Panics
/// If the heap size per thread is too small, panics.
pub fn writer(&self, overall_heap_size_in_bytes: usize) -> crate::Result<IndexWriter> {
let mut num_threads = num_cpus::get();
let mut num_threads = std::cmp::min(num_cpus::get(), MAX_NUM_THREAD);
let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads;
if heap_size_in_bytes_per_thread < HEAP_SIZE_MIN {
num_threads = (overall_heap_size_in_bytes / HEAP_SIZE_MIN).max(1);

View File

@@ -43,6 +43,9 @@ pub const MARGIN_IN_BYTES: usize = 1_000_000;
pub const HEAP_SIZE_MIN: usize = ((MARGIN_IN_BYTES as u32) * 3u32) as usize;
pub const HEAP_SIZE_MAX: usize = u32::max_value() as usize - MARGIN_IN_BYTES;
// We impose the number of index writter thread to be at most this.
pub const MAX_NUM_THREAD: usize = 8;
// Add document will block if the number of docs waiting in the queue to be indexed
// reaches `PIPELINE_MAX_SIZE_IN_DOCS`
const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000;