diff --git a/src/core/index.rs b/src/core/index.rs index c3753514d..e51fcb006 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -7,7 +7,7 @@ use std::fmt; use rustc_serialize::json; use core::SegmentId; use directory::{Directory, MmapDirectory, RAMDirectory}; -use indexer::IndexWriter; +use indexer::index_writer::open_index_writer; use core::searcher::Searcher; use std::convert::From; use num_cpus; @@ -18,6 +18,7 @@ use core::SegmentMeta; use super::pool::LeasedItem; use std::path::Path; use core::IndexMeta; +use IndexWriter; use core::META_FILEPATH; use super::segment::create_segment; use indexer::segment_updater::save_new_metas; @@ -147,8 +148,16 @@ impl Index { self.opstamp } - /// Creates a multithreaded writer. - /// Each writer produces an independent segment. + /// Open a new index writer. Attempts to acquire a lockfile. + /// + /// The lockfile should be deleted on drop, but it is possible + /// that due to a panic or other error, a stale lockfile will be + /// left in the index directory. If you are sure that no other + /// `IndexWriter` on the system is accessing the index directory, + /// it is safe to manually delete the lockfile. + /// + /// num_threads specifies the number of indexing workers that + /// should work at the same time. /// /// # Errors /// If the lockfile already exists, returns `Error::FileAlreadyExists`. @@ -158,12 +167,13 @@ impl Index { num_threads: usize, heap_size_in_bytes: usize) -> Result { - IndexWriter::open(self, num_threads, heap_size_in_bytes) + open_index_writer(self, num_threads, heap_size_in_bytes) } /// Creates a multithreaded writer /// It just calls `writer_with_num_threads` with the number of cores as `num_threads` + /// /// # Errors /// If the lockfile already exists, returns `Error::FileAlreadyExists`. /// # Panics diff --git a/src/error.rs b/src/error.rs index 3404e9911..2b405357f 100644 --- a/src/error.rs +++ b/src/error.rs @@ -3,7 +3,7 @@ /// Definition of Tantivy's error and result. use std::io; -use std::result; + use std::path::PathBuf; use std::error; use std::sync::PoisonError; @@ -12,8 +12,7 @@ use query; use schema; -/// Tantivy result. -pub type Result = result::Result; + /// Generic tantivy error. diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 8e6a9351e..92c19e619 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -86,6 +86,70 @@ impl !Send for IndexWriter {} impl !Sync for IndexWriter {} + +/// Open a new index writer. Attempts to acquire a lockfile. +/// +/// The lockfile should be deleted on drop, but it is possible +/// that due to a panic or other error, a stale lockfile will be +/// left in the index directory. If you are sure that no other +/// `IndexWriter` on the system is accessing the index directory, +/// it is safe to manually delete the lockfile. +/// +/// num_threads specifies the number of indexing workers that +/// should work at the same time. +/// # Errors +/// If the lockfile already exists, returns `Error::FileAlreadyExists`. +/// # Panics +/// If the heap size per thread is too small, panics. +pub fn open_index_writer(index: &Index, + num_threads: usize, + heap_size_in_bytes_per_thread: usize) + -> Result { + + if heap_size_in_bytes_per_thread <= HEAP_SIZE_LIMIT as usize { + panic!(format!("The heap size per thread needs to be at least {}.", + HEAP_SIZE_LIMIT)); + } + + let directory_lock = try!(DirectoryLock::lock(index.directory().box_clone())); + + let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) = + chan::sync(PIPELINE_MAX_SIZE_IN_DOCS); + + + let delete_queue = DeleteQueue::default(); + + let segment_updater = SegmentUpdater::new(index.clone(), delete_queue.clone())?; + + let mut index_writer = IndexWriter { + + _directory_lock: directory_lock, + + heap_size_in_bytes_per_thread: heap_size_in_bytes_per_thread, + index: index.clone(), + + document_receiver: document_receiver, + document_sender: document_sender, + + segment_updater: segment_updater, + + workers_join_handle: Vec::new(), + num_threads: num_threads, + + delete_queue: delete_queue, + + committed_opstamp: index.opstamp(), + uncommitted_opstamp: index.opstamp(), + + generation: 0, + + worker_id: 0, + }; + try!(index_writer.start_workers()); + Ok(index_writer) +} + + // TODO put delete bitset in segment entry // rather than DocToOpstamp. @@ -253,70 +317,6 @@ impl IndexWriter { Ok(()) } - - /// Open a new index writer. Attempts to acquire a lockfile. - /// - /// The lockfile should be deleted on drop, but it is possible - /// that due to a panic or other error, a stale lockfile will be - /// left in the index directory. If you are sure that no other - /// `IndexWriter` on the system is accessing the index directory, - /// it is safe to manually delete the lockfile. - /// - /// num_threads specifies the number of indexing workers that - /// should work at the same time. - /// # Errors - /// If the lockfile already exists, returns `Error::FileAlreadyExists`. - /// # Panics - /// If the heap size per thread is too small, panics. - pub fn open(index: &Index, - num_threads: usize, - heap_size_in_bytes_per_thread: usize) - -> Result { - - if heap_size_in_bytes_per_thread <= HEAP_SIZE_LIMIT as usize { - panic!(format!("The heap size per thread needs to be at least {}.", - HEAP_SIZE_LIMIT)); - } - - let directory_lock = try!(DirectoryLock::lock(index.directory().box_clone())); - - let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) = - chan::sync(PIPELINE_MAX_SIZE_IN_DOCS); - - - let delete_queue = DeleteQueue::default(); - - let segment_updater = SegmentUpdater::new(index.clone(), delete_queue.clone())?; - - let mut index_writer = IndexWriter { - - _directory_lock: directory_lock, - - heap_size_in_bytes_per_thread: heap_size_in_bytes_per_thread, - index: index.clone(), - - document_receiver: document_receiver, - document_sender: document_sender, - - segment_updater: segment_updater, - - workers_join_handle: Vec::new(), - num_threads: num_threads, - - delete_queue: delete_queue, - - committed_opstamp: index.opstamp(), - uncommitted_opstamp: index.opstamp(), - - generation: 0, - - worker_id: 0, - }; - try!(index_writer.start_workers()); - Ok(index_writer) - } - - pub fn get_merge_policy(&self) -> Box { self.segment_updater.get_merge_policy() } diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs index b2a71fa02..8380332af 100644 --- a/src/indexer/mod.rs +++ b/src/indexer/mod.rs @@ -1,5 +1,4 @@ - -mod index_writer; +pub mod index_writer; pub mod segment_serializer; pub mod merger; mod merge_policy; diff --git a/src/lib.rs b/src/lib.rs index 2333987e4..5c8ca7ece 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,6 +73,11 @@ mod macros { ); } +pub use error::Error; + +/// Tantivy result. +pub type Result = std::result::Result; + mod core; mod compression; mod fastfield; @@ -80,9 +85,6 @@ mod store; mod indexer; mod common; mod error; - -pub use error::{Result, Error}; - mod analyzer; mod datastruct;