Closes #64 - Improve Index creationg API / documentation

This commit is contained in:
Paul Masurel
2017-02-25 20:40:39 +09:00
parent ca1617d3cd
commit 3a86fc00a2
5 changed files with 86 additions and 76 deletions

View File

@@ -7,7 +7,7 @@ use std::fmt;
use rustc_serialize::json;
use core::SegmentId;
use directory::{Directory, MmapDirectory, RAMDirectory};
use indexer::IndexWriter;
use indexer::index_writer::open_index_writer;
use core::searcher::Searcher;
use std::convert::From;
use num_cpus;
@@ -18,6 +18,7 @@ use core::SegmentMeta;
use super::pool::LeasedItem;
use std::path::Path;
use core::IndexMeta;
use IndexWriter;
use core::META_FILEPATH;
use super::segment::create_segment;
use indexer::segment_updater::save_new_metas;
@@ -147,8 +148,16 @@ impl Index {
self.opstamp
}
/// Creates a multithreaded writer.
/// Each writer produces an independent segment.
/// Open a new index writer. Attempts to acquire a lockfile.
///
/// The lockfile should be deleted on drop, but it is possible
/// that due to a panic or other error, a stale lockfile will be
/// left in the index directory. If you are sure that no other
/// `IndexWriter` on the system is accessing the index directory,
/// it is safe to manually delete the lockfile.
///
/// num_threads specifies the number of indexing workers that
/// should work at the same time.
///
/// # Errors
/// If the lockfile already exists, returns `Error::FileAlreadyExists`.
@@ -158,12 +167,13 @@ impl Index {
num_threads: usize,
heap_size_in_bytes: usize)
-> Result<IndexWriter> {
IndexWriter::open(self, num_threads, heap_size_in_bytes)
open_index_writer(self, num_threads, heap_size_in_bytes)
}
/// Creates a multithreaded writer
/// It just calls `writer_with_num_threads` with the number of cores as `num_threads`
///
/// # Errors
/// If the lockfile already exists, returns `Error::FileAlreadyExists`.
/// # Panics

View File

@@ -3,7 +3,7 @@
/// Definition of Tantivy's error and result.
use std::io;
use std::result;
use std::path::PathBuf;
use std::error;
use std::sync::PoisonError;
@@ -12,8 +12,7 @@ use query;
use schema;
/// Tantivy result.
pub type Result<T> = result::Result<T, Error>;
/// Generic tantivy error.

View File

@@ -86,6 +86,70 @@ impl !Send for IndexWriter {}
impl !Sync for IndexWriter {}
/// Open a new index writer. Attempts to acquire a lockfile.
///
/// The lockfile should be deleted on drop, but it is possible
/// that due to a panic or other error, a stale lockfile will be
/// left in the index directory. If you are sure that no other
/// `IndexWriter` on the system is accessing the index directory,
/// it is safe to manually delete the lockfile.
///
/// num_threads specifies the number of indexing workers that
/// should work at the same time.
/// # Errors
/// If the lockfile already exists, returns `Error::FileAlreadyExists`.
/// # Panics
/// If the heap size per thread is too small, panics.
pub fn open_index_writer(index: &Index,
num_threads: usize,
heap_size_in_bytes_per_thread: usize)
-> Result<IndexWriter> {
if heap_size_in_bytes_per_thread <= HEAP_SIZE_LIMIT as usize {
panic!(format!("The heap size per thread needs to be at least {}.",
HEAP_SIZE_LIMIT));
}
let directory_lock = try!(DirectoryLock::lock(index.directory().box_clone()));
let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) =
chan::sync(PIPELINE_MAX_SIZE_IN_DOCS);
let delete_queue = DeleteQueue::default();
let segment_updater = SegmentUpdater::new(index.clone(), delete_queue.clone())?;
let mut index_writer = IndexWriter {
_directory_lock: directory_lock,
heap_size_in_bytes_per_thread: heap_size_in_bytes_per_thread,
index: index.clone(),
document_receiver: document_receiver,
document_sender: document_sender,
segment_updater: segment_updater,
workers_join_handle: Vec::new(),
num_threads: num_threads,
delete_queue: delete_queue,
committed_opstamp: index.opstamp(),
uncommitted_opstamp: index.opstamp(),
generation: 0,
worker_id: 0,
};
try!(index_writer.start_workers());
Ok(index_writer)
}
// TODO put delete bitset in segment entry
// rather than DocToOpstamp.
@@ -253,70 +317,6 @@ impl IndexWriter {
Ok(())
}
/// Open a new index writer. Attempts to acquire a lockfile.
///
/// The lockfile should be deleted on drop, but it is possible
/// that due to a panic or other error, a stale lockfile will be
/// left in the index directory. If you are sure that no other
/// `IndexWriter` on the system is accessing the index directory,
/// it is safe to manually delete the lockfile.
///
/// num_threads specifies the number of indexing workers that
/// should work at the same time.
/// # Errors
/// If the lockfile already exists, returns `Error::FileAlreadyExists`.
/// # Panics
/// If the heap size per thread is too small, panics.
pub fn open(index: &Index,
num_threads: usize,
heap_size_in_bytes_per_thread: usize)
-> Result<IndexWriter> {
if heap_size_in_bytes_per_thread <= HEAP_SIZE_LIMIT as usize {
panic!(format!("The heap size per thread needs to be at least {}.",
HEAP_SIZE_LIMIT));
}
let directory_lock = try!(DirectoryLock::lock(index.directory().box_clone()));
let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) =
chan::sync(PIPELINE_MAX_SIZE_IN_DOCS);
let delete_queue = DeleteQueue::default();
let segment_updater = SegmentUpdater::new(index.clone(), delete_queue.clone())?;
let mut index_writer = IndexWriter {
_directory_lock: directory_lock,
heap_size_in_bytes_per_thread: heap_size_in_bytes_per_thread,
index: index.clone(),
document_receiver: document_receiver,
document_sender: document_sender,
segment_updater: segment_updater,
workers_join_handle: Vec::new(),
num_threads: num_threads,
delete_queue: delete_queue,
committed_opstamp: index.opstamp(),
uncommitted_opstamp: index.opstamp(),
generation: 0,
worker_id: 0,
};
try!(index_writer.start_workers());
Ok(index_writer)
}
pub fn get_merge_policy(&self) -> Box<MergePolicy> {
self.segment_updater.get_merge_policy()
}

View File

@@ -1,5 +1,4 @@
mod index_writer;
pub mod index_writer;
pub mod segment_serializer;
pub mod merger;
mod merge_policy;

View File

@@ -73,6 +73,11 @@ mod macros {
);
}
pub use error::Error;
/// Tantivy result.
pub type Result<T> = std::result::Result<T, Error>;
mod core;
mod compression;
mod fastfield;
@@ -80,9 +85,6 @@ mod store;
mod indexer;
mod common;
mod error;
pub use error::{Result, Error};
mod analyzer;
mod datastruct;