mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 01:02:55 +00:00
Doc
This commit is contained in:
@@ -14,8 +14,13 @@ pub fn make_io_err(msg: String) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::Other, msg)
|
||||
}
|
||||
|
||||
|
||||
/// Has length trait
|
||||
pub trait HasLen {
|
||||
/// Return length
|
||||
fn len(&self,) -> usize;
|
||||
|
||||
/// Returns true iff empty.
|
||||
fn is_empty(&self,) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
@@ -8,6 +8,10 @@ pub struct OpenTimer<'a> {
|
||||
}
|
||||
|
||||
impl<'a> OpenTimer<'a> {
|
||||
/// Starts timing a new named subtask
|
||||
///
|
||||
/// The timer is stopped automatically
|
||||
/// when the `OpenTimer` is dropped.
|
||||
pub fn open(&mut self, name: &'static str) -> OpenTimer {
|
||||
OpenTimer {
|
||||
name: name,
|
||||
@@ -28,6 +32,7 @@ impl<'a> Drop for OpenTimer<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Timing recording
|
||||
#[derive(Debug, RustcEncodable)]
|
||||
pub struct Timing {
|
||||
name: &'static str,
|
||||
@@ -35,17 +40,20 @@ pub struct Timing {
|
||||
depth: u32,
|
||||
}
|
||||
|
||||
/// Timer tree
|
||||
#[derive(Debug, RustcEncodable)]
|
||||
pub struct TimerTree {
|
||||
timings: Vec<Timing>,
|
||||
}
|
||||
|
||||
impl TimerTree {
|
||||
|
||||
|
||||
/// Returns the total time elapsed in microseconds
|
||||
pub fn total_time(&self,) -> i64 {
|
||||
self.timings.last().unwrap().duration
|
||||
}
|
||||
|
||||
/// Open a new named subtask
|
||||
pub fn open(&mut self, name: &'static str) -> OpenTimer {
|
||||
OpenTimer {
|
||||
name: name,
|
||||
|
||||
@@ -150,7 +150,10 @@ impl Index {
|
||||
pub fn writer(&self, heap_size_in_bytes: usize) -> Result<IndexWriter> {
|
||||
self.writer_with_num_threads(num_cpus::get(), heap_size_in_bytes)
|
||||
}
|
||||
|
||||
|
||||
/// Accessor to the index schema
|
||||
///
|
||||
/// The schema is actually cloned.
|
||||
pub fn schema(&self,) -> Schema {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
@@ -65,7 +65,8 @@ impl SegmentReader {
|
||||
pub fn num_docs(&self) -> DocId {
|
||||
self.segment_info.max_doc
|
||||
}
|
||||
|
||||
|
||||
/// Accessor to a segment's fast field reader given a field.
|
||||
pub fn get_fast_field_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
match *field_entry.field_type() {
|
||||
@@ -80,11 +81,17 @@ impl SegmentReader {
|
||||
}
|
||||
}
|
||||
|
||||
/// Accessor to the segment's `Field norms`'s reader.
|
||||
///
|
||||
/// Field norms are the length (in tokens) of the fields.
|
||||
/// It is used in the computation of the [TfIdf](https://fulmicoton.gitbooks.io/tantivy-doc/content/tfidf.html).
|
||||
///
|
||||
/// They are simply stored as a fast field, serialized in
|
||||
/// the `.fieldnorm` file of the segment.
|
||||
pub fn get_fieldnorms_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
|
||||
self.fieldnorms_reader.get_field(field)
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Returns the number of documents containing the term.
|
||||
pub fn doc_freq(&self, term: &Term) -> u32 {
|
||||
match self.get_term_info(term) {
|
||||
@@ -92,7 +99,8 @@ impl SegmentReader {
|
||||
None => 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Accessor to the segment's `StoreReader`.
|
||||
pub fn get_store_reader(&self) -> &StoreReader {
|
||||
&self.store_reader
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::path::PathBuf;
|
||||
use std::io;
|
||||
|
||||
|
||||
/// Error that may occur when opening a directory
|
||||
#[derive(Debug)]
|
||||
pub enum OpenDirectoryError {
|
||||
|
||||
@@ -3,6 +3,8 @@ mod ram_directory;
|
||||
mod directory;
|
||||
mod read_only_source;
|
||||
mod shared_vec_slice;
|
||||
|
||||
/// Errors specific to the directory module.
|
||||
pub mod error;
|
||||
|
||||
use std::io::{Seek, Write};
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#![allow(enum_variant_names)]
|
||||
|
||||
/// Definition of Tantivy's error and result.
|
||||
|
||||
use std::io;
|
||||
use std::result;
|
||||
use std::path::PathBuf;
|
||||
@@ -10,6 +12,10 @@ use query;
|
||||
use schema;
|
||||
|
||||
|
||||
/// Tantivy result.
|
||||
pub type Result<T> = result::Result<T, Error>;
|
||||
|
||||
|
||||
/// Generic tantivy error.
|
||||
///
|
||||
/// Any specialized error return in tantivy can be converted in `tantivy::Error`.
|
||||
@@ -87,6 +93,3 @@ impl From<OpenDirectoryError> for Error {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Tantivy result.
|
||||
pub type Result<T> = result::Result<T, Error>;
|
||||
|
||||
@@ -1,3 +1,15 @@
|
||||
/// FastField module
|
||||
///
|
||||
/// FastField are the equivalent of `DocValues` in `Lucene`.
|
||||
/// FastFields are stored in column-oriented fashion and allow fast
|
||||
/// random access given a `DocId`.
|
||||
///
|
||||
/// Their performance is comparable to that of an array lookup.
|
||||
/// FastField are useful when a field is required for all or most of
|
||||
/// the `DocSet` : for instance for scoring, grouping, filtering, or facetting.
|
||||
///
|
||||
/// Currently only u32 fastfield are supported.
|
||||
|
||||
mod reader;
|
||||
mod writer;
|
||||
mod serializer;
|
||||
|
||||
@@ -36,6 +36,12 @@ type DocumentReceiver = chan::Receiver<Document>;
|
||||
type NewSegmentSender = chan::Sender<Result<(SegmentId, usize)>>;
|
||||
type NewSegmentReceiver = chan::Receiver<Result<(SegmentId, usize)>>;
|
||||
|
||||
/// `IndexWriter` is the user entry-point to add document to an index.
|
||||
///
|
||||
/// It manages a small number of indexing thread, as well as a shared
|
||||
/// indexing queue.
|
||||
/// Each indexing thread builds its own independant `Segment`, via
|
||||
/// a `SegmentWriter` object.
|
||||
pub struct IndexWriter {
|
||||
index: Index,
|
||||
heap_size_in_bytes_per_thread: usize,
|
||||
@@ -68,7 +74,6 @@ fn index_documents(heap: &mut Heap,
|
||||
}
|
||||
|
||||
|
||||
|
||||
impl IndexWriter {
|
||||
|
||||
/// Spawns a new worker thread for indexing.
|
||||
@@ -139,7 +144,8 @@ impl IndexWriter {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
/// Merges a given list of segments
|
||||
pub fn merge(&mut self, segments: &[Segment]) -> Result<()> {
|
||||
let schema = self.index.schema();
|
||||
let merger = try!(IndexMerger::open(schema, segments));
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use Result;
|
||||
use Error;
|
||||
|
||||
use std::io::Write;
|
||||
use rustc_serialize::json;
|
||||
@@ -10,6 +9,9 @@ use fastfield::FastFieldSerializer;
|
||||
use store::StoreWriter;
|
||||
use postings::PostingsSerializer;
|
||||
|
||||
|
||||
/// Segment serializer is in charge of laying out on disk
|
||||
/// the data accumulated and sorted by the `SegmentWriter`.
|
||||
pub struct SegmentSerializer {
|
||||
segment: Segment,
|
||||
store_writer: StoreWriter,
|
||||
@@ -19,7 +21,8 @@ pub struct SegmentSerializer {
|
||||
}
|
||||
|
||||
impl SegmentSerializer {
|
||||
|
||||
|
||||
/// Creates a new `SegmentSerializer`.
|
||||
pub fn for_segment(segment: &mut Segment) -> Result<SegmentSerializer> {
|
||||
let store_write = try!(segment.open_write(SegmentComponent::STORE));
|
||||
|
||||
@@ -38,23 +41,28 @@ impl SegmentSerializer {
|
||||
fieldnorms_serializer: fieldnorms_serializer,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
/// Accessor to the `PostingsSerializer`.
|
||||
pub fn get_postings_serializer(&mut self,) -> &mut PostingsSerializer {
|
||||
&mut self.postings_serializer
|
||||
}
|
||||
|
||||
/// Accessor to the `FastFieldSerializer`.
|
||||
pub fn get_fast_field_serializer(&mut self,) -> &mut FastFieldSerializer {
|
||||
&mut self.fast_field_serializer
|
||||
}
|
||||
|
||||
/// Accessor to the field norm serializer.
|
||||
pub fn get_fieldnorms_serializer(&mut self,) -> &mut FastFieldSerializer {
|
||||
&mut self.fieldnorms_serializer
|
||||
}
|
||||
|
||||
|
||||
/// Accessor to the `StoreWriter`.
|
||||
pub fn get_store_writer(&mut self,) -> &mut StoreWriter {
|
||||
&mut self.store_writer
|
||||
}
|
||||
|
||||
|
||||
/// Write the `SegmentInfo`
|
||||
pub fn write_segment_info(&mut self, segment_info: &SegmentInfo) -> Result<()> {
|
||||
let mut write = try!(self.segment.open_write(SegmentComponent::INFO));
|
||||
let json_data = json::encode(segment_info)
|
||||
@@ -63,7 +71,8 @@ impl SegmentSerializer {
|
||||
try!(write.flush());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
/// Finalize the segment serialization.
|
||||
pub fn close(mut self,) -> Result<()> {
|
||||
try!(self.fast_field_serializer.close());
|
||||
try!(self.postings_serializer.close());
|
||||
|
||||
19
src/lib.rs
19
src/lib.rs
@@ -10,6 +10,11 @@
|
||||
|
||||
#![warn(missing_docs)]
|
||||
|
||||
//! # `tantivy`
|
||||
//!
|
||||
//! Tantivy is a search engine library.
|
||||
//! Think `Lucene`, but in Rust.
|
||||
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
@@ -61,18 +66,22 @@ pub use error::{Result, Error};
|
||||
mod analyzer;
|
||||
mod datastruct;
|
||||
|
||||
pub mod postings;
|
||||
pub mod query;
|
||||
pub mod directory;
|
||||
|
||||
/// Query module
|
||||
pub mod query;
|
||||
/// Directory module
|
||||
pub mod directory;
|
||||
/// Collector module
|
||||
pub mod collector;
|
||||
/// Postings module (also called inverted index)
|
||||
pub mod postings;
|
||||
/// Schema
|
||||
pub mod schema;
|
||||
|
||||
|
||||
pub use directory::Directory;
|
||||
pub use core::searcher::Searcher;
|
||||
|
||||
|
||||
///
|
||||
pub use core::Index;
|
||||
pub use indexer::IndexWriter;
|
||||
pub use schema::Term;
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
/// Postings module
|
||||
///
|
||||
/// Postings, also called inverted lists, is the key datastructure
|
||||
/// to full-text search.
|
||||
|
||||
|
||||
mod postings;
|
||||
mod recorder;
|
||||
mod serializer;
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
/// Query module
|
||||
///
|
||||
/// The query module regroups all of tantivy's query objects
|
||||
///
|
||||
|
||||
mod query;
|
||||
mod multi_term_query;
|
||||
mod multi_term_accumulator;
|
||||
|
||||
Reference in New Issue
Block a user