mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 09:32:54 +00:00
Closes #64 - Improve Index creationg API / documentation
This commit is contained in:
@@ -7,7 +7,7 @@ use std::fmt;
|
||||
use rustc_serialize::json;
|
||||
use core::SegmentId;
|
||||
use directory::{Directory, MmapDirectory, RAMDirectory};
|
||||
use indexer::IndexWriter;
|
||||
use indexer::index_writer::open_index_writer;
|
||||
use core::searcher::Searcher;
|
||||
use std::convert::From;
|
||||
use num_cpus;
|
||||
@@ -18,6 +18,7 @@ use core::SegmentMeta;
|
||||
use super::pool::LeasedItem;
|
||||
use std::path::Path;
|
||||
use core::IndexMeta;
|
||||
use IndexWriter;
|
||||
use core::META_FILEPATH;
|
||||
use super::segment::create_segment;
|
||||
use indexer::segment_updater::save_new_metas;
|
||||
@@ -147,8 +148,16 @@ impl Index {
|
||||
self.opstamp
|
||||
}
|
||||
|
||||
/// Creates a multithreaded writer.
|
||||
/// Each writer produces an independent segment.
|
||||
/// Open a new index writer. Attempts to acquire a lockfile.
|
||||
///
|
||||
/// The lockfile should be deleted on drop, but it is possible
|
||||
/// that due to a panic or other error, a stale lockfile will be
|
||||
/// left in the index directory. If you are sure that no other
|
||||
/// `IndexWriter` on the system is accessing the index directory,
|
||||
/// it is safe to manually delete the lockfile.
|
||||
///
|
||||
/// num_threads specifies the number of indexing workers that
|
||||
/// should work at the same time.
|
||||
///
|
||||
/// # Errors
|
||||
/// If the lockfile already exists, returns `Error::FileAlreadyExists`.
|
||||
@@ -158,12 +167,13 @@ impl Index {
|
||||
num_threads: usize,
|
||||
heap_size_in_bytes: usize)
|
||||
-> Result<IndexWriter> {
|
||||
IndexWriter::open(self, num_threads, heap_size_in_bytes)
|
||||
open_index_writer(self, num_threads, heap_size_in_bytes)
|
||||
}
|
||||
|
||||
|
||||
/// Creates a multithreaded writer
|
||||
/// It just calls `writer_with_num_threads` with the number of cores as `num_threads`
|
||||
///
|
||||
/// # Errors
|
||||
/// If the lockfile already exists, returns `Error::FileAlreadyExists`.
|
||||
/// # Panics
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
/// Definition of Tantivy's error and result.
|
||||
|
||||
use std::io;
|
||||
use std::result;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::error;
|
||||
use std::sync::PoisonError;
|
||||
@@ -12,8 +12,7 @@ use query;
|
||||
use schema;
|
||||
|
||||
|
||||
/// Tantivy result.
|
||||
pub type Result<T> = result::Result<T, Error>;
|
||||
|
||||
|
||||
|
||||
/// Generic tantivy error.
|
||||
|
||||
@@ -86,6 +86,70 @@ impl !Send for IndexWriter {}
|
||||
impl !Sync for IndexWriter {}
|
||||
|
||||
|
||||
|
||||
/// Open a new index writer. Attempts to acquire a lockfile.
|
||||
///
|
||||
/// The lockfile should be deleted on drop, but it is possible
|
||||
/// that due to a panic or other error, a stale lockfile will be
|
||||
/// left in the index directory. If you are sure that no other
|
||||
/// `IndexWriter` on the system is accessing the index directory,
|
||||
/// it is safe to manually delete the lockfile.
|
||||
///
|
||||
/// num_threads specifies the number of indexing workers that
|
||||
/// should work at the same time.
|
||||
/// # Errors
|
||||
/// If the lockfile already exists, returns `Error::FileAlreadyExists`.
|
||||
/// # Panics
|
||||
/// If the heap size per thread is too small, panics.
|
||||
pub fn open_index_writer(index: &Index,
|
||||
num_threads: usize,
|
||||
heap_size_in_bytes_per_thread: usize)
|
||||
-> Result<IndexWriter> {
|
||||
|
||||
if heap_size_in_bytes_per_thread <= HEAP_SIZE_LIMIT as usize {
|
||||
panic!(format!("The heap size per thread needs to be at least {}.",
|
||||
HEAP_SIZE_LIMIT));
|
||||
}
|
||||
|
||||
let directory_lock = try!(DirectoryLock::lock(index.directory().box_clone()));
|
||||
|
||||
let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) =
|
||||
chan::sync(PIPELINE_MAX_SIZE_IN_DOCS);
|
||||
|
||||
|
||||
let delete_queue = DeleteQueue::default();
|
||||
|
||||
let segment_updater = SegmentUpdater::new(index.clone(), delete_queue.clone())?;
|
||||
|
||||
let mut index_writer = IndexWriter {
|
||||
|
||||
_directory_lock: directory_lock,
|
||||
|
||||
heap_size_in_bytes_per_thread: heap_size_in_bytes_per_thread,
|
||||
index: index.clone(),
|
||||
|
||||
document_receiver: document_receiver,
|
||||
document_sender: document_sender,
|
||||
|
||||
segment_updater: segment_updater,
|
||||
|
||||
workers_join_handle: Vec::new(),
|
||||
num_threads: num_threads,
|
||||
|
||||
delete_queue: delete_queue,
|
||||
|
||||
committed_opstamp: index.opstamp(),
|
||||
uncommitted_opstamp: index.opstamp(),
|
||||
|
||||
generation: 0,
|
||||
|
||||
worker_id: 0,
|
||||
};
|
||||
try!(index_writer.start_workers());
|
||||
Ok(index_writer)
|
||||
}
|
||||
|
||||
|
||||
// TODO put delete bitset in segment entry
|
||||
// rather than DocToOpstamp.
|
||||
|
||||
@@ -253,70 +317,6 @@ impl IndexWriter {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
/// Open a new index writer. Attempts to acquire a lockfile.
|
||||
///
|
||||
/// The lockfile should be deleted on drop, but it is possible
|
||||
/// that due to a panic or other error, a stale lockfile will be
|
||||
/// left in the index directory. If you are sure that no other
|
||||
/// `IndexWriter` on the system is accessing the index directory,
|
||||
/// it is safe to manually delete the lockfile.
|
||||
///
|
||||
/// num_threads specifies the number of indexing workers that
|
||||
/// should work at the same time.
|
||||
/// # Errors
|
||||
/// If the lockfile already exists, returns `Error::FileAlreadyExists`.
|
||||
/// # Panics
|
||||
/// If the heap size per thread is too small, panics.
|
||||
pub fn open(index: &Index,
|
||||
num_threads: usize,
|
||||
heap_size_in_bytes_per_thread: usize)
|
||||
-> Result<IndexWriter> {
|
||||
|
||||
if heap_size_in_bytes_per_thread <= HEAP_SIZE_LIMIT as usize {
|
||||
panic!(format!("The heap size per thread needs to be at least {}.",
|
||||
HEAP_SIZE_LIMIT));
|
||||
}
|
||||
|
||||
let directory_lock = try!(DirectoryLock::lock(index.directory().box_clone()));
|
||||
|
||||
let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) =
|
||||
chan::sync(PIPELINE_MAX_SIZE_IN_DOCS);
|
||||
|
||||
|
||||
let delete_queue = DeleteQueue::default();
|
||||
|
||||
let segment_updater = SegmentUpdater::new(index.clone(), delete_queue.clone())?;
|
||||
|
||||
let mut index_writer = IndexWriter {
|
||||
|
||||
_directory_lock: directory_lock,
|
||||
|
||||
heap_size_in_bytes_per_thread: heap_size_in_bytes_per_thread,
|
||||
index: index.clone(),
|
||||
|
||||
document_receiver: document_receiver,
|
||||
document_sender: document_sender,
|
||||
|
||||
segment_updater: segment_updater,
|
||||
|
||||
workers_join_handle: Vec::new(),
|
||||
num_threads: num_threads,
|
||||
|
||||
delete_queue: delete_queue,
|
||||
|
||||
committed_opstamp: index.opstamp(),
|
||||
uncommitted_opstamp: index.opstamp(),
|
||||
|
||||
generation: 0,
|
||||
|
||||
worker_id: 0,
|
||||
};
|
||||
try!(index_writer.start_workers());
|
||||
Ok(index_writer)
|
||||
}
|
||||
|
||||
|
||||
pub fn get_merge_policy(&self) -> Box<MergePolicy> {
|
||||
self.segment_updater.get_merge_policy()
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
|
||||
mod index_writer;
|
||||
pub mod index_writer;
|
||||
pub mod segment_serializer;
|
||||
pub mod merger;
|
||||
mod merge_policy;
|
||||
|
||||
@@ -73,6 +73,11 @@ mod macros {
|
||||
);
|
||||
}
|
||||
|
||||
pub use error::Error;
|
||||
|
||||
/// Tantivy result.
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
mod core;
|
||||
mod compression;
|
||||
mod fastfield;
|
||||
@@ -80,9 +85,6 @@ mod store;
|
||||
mod indexer;
|
||||
mod common;
|
||||
mod error;
|
||||
|
||||
pub use error::{Result, Error};
|
||||
|
||||
mod analyzer;
|
||||
mod datastruct;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user