This commit is contained in:
Paul Masurel
2016-09-22 15:45:37 +09:00
parent ca331e7fe5
commit 1a08ca4f95
13 changed files with 98 additions and 23 deletions

View File

@@ -14,8 +14,13 @@ pub fn make_io_err(msg: String) -> io::Error {
io::Error::new(io::ErrorKind::Other, msg)
}
/// Has length trait
pub trait HasLen {
/// Return length
fn len(&self,) -> usize;
/// Returns true iff empty.
fn is_empty(&self,) -> bool {
self.len() == 0
}

View File

@@ -8,6 +8,10 @@ pub struct OpenTimer<'a> {
}
impl<'a> OpenTimer<'a> {
/// Starts timing a new named subtask
///
/// The timer is stopped automatically
/// when the `OpenTimer` is dropped.
pub fn open(&mut self, name: &'static str) -> OpenTimer {
OpenTimer {
name: name,
@@ -28,6 +32,7 @@ impl<'a> Drop for OpenTimer<'a> {
}
}
/// Timing recording
#[derive(Debug, RustcEncodable)]
pub struct Timing {
name: &'static str,
@@ -35,17 +40,20 @@ pub struct Timing {
depth: u32,
}
/// Timer tree
#[derive(Debug, RustcEncodable)]
pub struct TimerTree {
timings: Vec<Timing>,
}
impl TimerTree {
/// Returns the total time elapsed in microseconds
pub fn total_time(&self,) -> i64 {
self.timings.last().unwrap().duration
}
/// Open a new named subtask
pub fn open(&mut self, name: &'static str) -> OpenTimer {
OpenTimer {
name: name,

View File

@@ -150,7 +150,10 @@ impl Index {
pub fn writer(&self, heap_size_in_bytes: usize) -> Result<IndexWriter> {
self.writer_with_num_threads(num_cpus::get(), heap_size_in_bytes)
}
/// Accessor to the index schema
///
/// The schema is actually cloned.
pub fn schema(&self,) -> Schema {
self.schema.clone()
}

View File

@@ -65,7 +65,8 @@ impl SegmentReader {
pub fn num_docs(&self) -> DocId {
self.segment_info.max_doc
}
/// Accessor to a segment's fast field reader given a field.
pub fn get_fast_field_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
let field_entry = self.schema.get_field_entry(field);
match *field_entry.field_type() {
@@ -80,11 +81,17 @@ impl SegmentReader {
}
}
/// Accessor to the segment's `Field norms`'s reader.
///
/// Field norms are the length (in tokens) of the fields.
/// It is used in the computation of the [TfIdf](https://fulmicoton.gitbooks.io/tantivy-doc/content/tfidf.html).
///
/// They are simply stored as a fast field, serialized in
/// the `.fieldnorm` file of the segment.
pub fn get_fieldnorms_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
self.fieldnorms_reader.get_field(field)
}
/// Returns the number of documents containing the term.
pub fn doc_freq(&self, term: &Term) -> u32 {
match self.get_term_info(term) {
@@ -92,7 +99,8 @@ impl SegmentReader {
None => 0,
}
}
/// Accessor to the segment's `StoreReader`.
pub fn get_store_reader(&self) -> &StoreReader {
&self.store_reader
}

View File

@@ -1,7 +1,6 @@
use std::path::PathBuf;
use std::io;
/// Error that may occur when opening a directory
#[derive(Debug)]
pub enum OpenDirectoryError {

View File

@@ -3,6 +3,8 @@ mod ram_directory;
mod directory;
mod read_only_source;
mod shared_vec_slice;
/// Errors specific to the directory module.
pub mod error;
use std::io::{Seek, Write};

View File

@@ -1,5 +1,7 @@
#![allow(enum_variant_names)]
/// Definition of Tantivy's error and result.
use std::io;
use std::result;
use std::path::PathBuf;
@@ -10,6 +12,10 @@ use query;
use schema;
/// Tantivy result.
pub type Result<T> = result::Result<T, Error>;
/// Generic tantivy error.
///
/// Any specialized error return in tantivy can be converted in `tantivy::Error`.
@@ -87,6 +93,3 @@ impl From<OpenDirectoryError> for Error {
}
}
}
/// Tantivy result.
pub type Result<T> = result::Result<T, Error>;

View File

@@ -1,3 +1,15 @@
/// FastField module
///
/// FastField are the equivalent of `DocValues` in `Lucene`.
/// FastFields are stored in column-oriented fashion and allow fast
/// random access given a `DocId`.
///
/// Their performance is comparable to that of an array lookup.
/// FastField are useful when a field is required for all or most of
/// the `DocSet` : for instance for scoring, grouping, filtering, or facetting.
///
/// Currently only u32 fastfield are supported.
mod reader;
mod writer;
mod serializer;

View File

@@ -36,6 +36,12 @@ type DocumentReceiver = chan::Receiver<Document>;
type NewSegmentSender = chan::Sender<Result<(SegmentId, usize)>>;
type NewSegmentReceiver = chan::Receiver<Result<(SegmentId, usize)>>;
/// `IndexWriter` is the user entry-point to add document to an index.
///
/// It manages a small number of indexing thread, as well as a shared
/// indexing queue.
/// Each indexing thread builds its own independant `Segment`, via
/// a `SegmentWriter` object.
pub struct IndexWriter {
index: Index,
heap_size_in_bytes_per_thread: usize,
@@ -68,7 +74,6 @@ fn index_documents(heap: &mut Heap,
}
impl IndexWriter {
/// Spawns a new worker thread for indexing.
@@ -139,7 +144,8 @@ impl IndexWriter {
}
Ok(())
}
/// Merges a given list of segments
pub fn merge(&mut self, segments: &[Segment]) -> Result<()> {
let schema = self.index.schema();
let merger = try!(IndexMerger::open(schema, segments));

View File

@@ -1,5 +1,4 @@
use Result;
use Error;
use std::io::Write;
use rustc_serialize::json;
@@ -10,6 +9,9 @@ use fastfield::FastFieldSerializer;
use store::StoreWriter;
use postings::PostingsSerializer;
/// Segment serializer is in charge of laying out on disk
/// the data accumulated and sorted by the `SegmentWriter`.
pub struct SegmentSerializer {
segment: Segment,
store_writer: StoreWriter,
@@ -19,7 +21,8 @@ pub struct SegmentSerializer {
}
impl SegmentSerializer {
/// Creates a new `SegmentSerializer`.
pub fn for_segment(segment: &mut Segment) -> Result<SegmentSerializer> {
let store_write = try!(segment.open_write(SegmentComponent::STORE));
@@ -38,23 +41,28 @@ impl SegmentSerializer {
fieldnorms_serializer: fieldnorms_serializer,
})
}
/// Accessor to the `PostingsSerializer`.
pub fn get_postings_serializer(&mut self,) -> &mut PostingsSerializer {
&mut self.postings_serializer
}
/// Accessor to the `FastFieldSerializer`.
pub fn get_fast_field_serializer(&mut self,) -> &mut FastFieldSerializer {
&mut self.fast_field_serializer
}
/// Accessor to the field norm serializer.
pub fn get_fieldnorms_serializer(&mut self,) -> &mut FastFieldSerializer {
&mut self.fieldnorms_serializer
}
/// Accessor to the `StoreWriter`.
pub fn get_store_writer(&mut self,) -> &mut StoreWriter {
&mut self.store_writer
}
/// Write the `SegmentInfo`
pub fn write_segment_info(&mut self, segment_info: &SegmentInfo) -> Result<()> {
let mut write = try!(self.segment.open_write(SegmentComponent::INFO));
let json_data = json::encode(segment_info)
@@ -63,7 +71,8 @@ impl SegmentSerializer {
try!(write.flush());
Ok(())
}
/// Finalize the segment serialization.
pub fn close(mut self,) -> Result<()> {
try!(self.fast_field_serializer.close());
try!(self.postings_serializer.close());

View File

@@ -10,6 +10,11 @@
#![warn(missing_docs)]
//! # `tantivy`
//!
//! Tantivy is a search engine library.
//! Think `Lucene`, but in Rust.
#[macro_use]
extern crate lazy_static;
@@ -61,18 +66,22 @@ pub use error::{Result, Error};
mod analyzer;
mod datastruct;
pub mod postings;
pub mod query;
pub mod directory;
/// Query module
pub mod query;
/// Directory module
pub mod directory;
/// Collector module
pub mod collector;
/// Postings module (also called inverted index)
pub mod postings;
/// Schema
pub mod schema;
pub use directory::Directory;
pub use core::searcher::Searcher;
///
pub use core::Index;
pub use indexer::IndexWriter;
pub use schema::Term;

View File

@@ -1,3 +1,9 @@
/// Postings module
///
/// Postings, also called inverted lists, is the key datastructure
/// to full-text search.
mod postings;
mod recorder;
mod serializer;

View File

@@ -1,3 +1,8 @@
/// Query module
///
/// The query module regroups all of tantivy's query objects
///
mod query;
mod multi_term_query;
mod multi_term_accumulator;