Doc

2026-01-06 01:02:55 +00:00 · 2016-09-22 15:45:37 +09:00
parent ca331e7fe5
commit 1a08ca4f95
13 changed files with 98 additions and 23 deletions
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -14,8 +14,13 @@ pub fn make_io_err(msg: String) -> io::Error {
    io::Error::new(io::ErrorKind::Other, msg)
 }

+
+/// Has length trait
 pub trait HasLen {
+    /// Return length
    fn len(&self,) -> usize;
+    
+    /// Returns true iff empty.
    fn is_empty(&self,) -> bool {
        self.len() == 0
    }
--- a/src/common/timer.rs
+++ b/src/common/timer.rs
@@ -8,6 +8,10 @@ pub struct OpenTimer<'a> {
 }

 impl<'a> OpenTimer<'a> {
+    /// Starts timing a new named subtask
+    ///
+    /// The timer is stopped automatically 
+    /// when the `OpenTimer` is dropped.
    pub fn open(&mut self, name: &'static str) -> OpenTimer {
        OpenTimer {
            name: name,
@@ -28,6 +32,7 @@ impl<'a> Drop for OpenTimer<'a> {
    }
 }

+/// Timing recording
 #[derive(Debug, RustcEncodable)]
 pub struct Timing {
    name: &'static str,
@@ -35,17 +40,20 @@ pub struct Timing {
    depth: u32,
 }

+/// Timer tree
 #[derive(Debug, RustcEncodable)]
 pub struct TimerTree {
    timings: Vec<Timing>,
 }

 impl TimerTree {
-    
+        
+    /// Returns the total time elapsed in microseconds 
    pub fn total_time(&self,) -> i64 {
        self.timings.last().unwrap().duration
    }
    
+    /// Open a new named subtask
    pub fn open(&mut self, name: &'static str) -> OpenTimer {
        OpenTimer {
            name: name,
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -150,7 +150,10 @@ impl Index {
    pub fn writer(&self, heap_size_in_bytes: usize) -> Result<IndexWriter> {
        self.writer_with_num_threads(num_cpus::get(), heap_size_in_bytes)
    }
-
+    
+    /// Accessor to the index schema
+    ///
+    /// The schema is actually cloned.
    pub fn schema(&self,) -> Schema {
        self.schema.clone()
    }
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -65,7 +65,8 @@ impl SegmentReader {
    pub fn num_docs(&self) -> DocId {
        self.segment_info.max_doc
    }
-
+    
+    /// Accessor to a segment's fast field reader given a field.
    pub fn get_fast_field_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
        let field_entry = self.schema.get_field_entry(field);
        match *field_entry.field_type() {
@@ -80,11 +81,17 @@ impl SegmentReader {
        }
    }
    
+    /// Accessor to the segment's `Field norms`'s reader.
+    ///
+    /// Field norms are the length (in tokens) of the fields.
+    /// It is used in the computation of the [TfIdf](https://fulmicoton.gitbooks.io/tantivy-doc/content/tfidf.html).
+    ///
+    /// They are simply stored as a fast field, serialized in 
+    /// the `.fieldnorm` file of the segment. 
    pub fn get_fieldnorms_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
        self.fieldnorms_reader.get_field(field) 
    }
-    
-    
+        
    /// Returns the number of documents containing the term.
    pub fn doc_freq(&self, term: &Term) -> u32 {
        match self.get_term_info(term) {
@@ -92,7 +99,8 @@ impl SegmentReader {
            None => 0,
        }
    }    
-
+    
+    /// Accessor to the segment's `StoreReader`.
    pub fn get_store_reader(&self) -> &StoreReader {
        &self.store_reader
    }
--- a/src/directory/error.rs
+++ b/src/directory/error.rs
@@ -1,7 +1,6 @@
 use std::path::PathBuf;
 use std::io;

-
 /// Error that may occur when opening a directory
 #[derive(Debug)]
 pub enum OpenDirectoryError {
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -3,6 +3,8 @@ mod ram_directory;
 mod directory;
 mod read_only_source;
 mod shared_vec_slice;
+
+/// Errors specific to the directory module.
 pub mod error;

 use std::io::{Seek, Write};
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,5 +1,7 @@
 #![allow(enum_variant_names)]

+/// Definition of Tantivy's error and result.
+
 use std::io;
 use std::result;
 use std::path::PathBuf;
@@ -10,6 +12,10 @@ use query;
 use schema;


+/// Tantivy result.
+pub type Result<T> = result::Result<T, Error>;
+
+
 /// Generic tantivy error.
 ///
 /// Any specialized error return in tantivy can be converted in `tantivy::Error`.
@@ -87,6 +93,3 @@ impl From<OpenDirectoryError> for Error {
        }
    }
 }
-
-/// Tantivy result.
-pub type Result<T> = result::Result<T, Error>;
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -1,3 +1,15 @@
+/// FastField module
+///
+/// FastField are the equivalent of `DocValues` in `Lucene`.
+/// FastFields are stored in column-oriented fashion and allow fast
+/// random access given a `DocId`.
+///
+/// Their performance is comparable to that of an array lookup.
+/// FastField are useful when a field is required for all or most of
+/// the `DocSet` : for instance for scoring, grouping, filtering, or facetting.
+/// 
+/// Currently only u32 fastfield are supported.
+
 mod reader;
 mod writer;
 mod serializer;
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -36,6 +36,12 @@ type DocumentReceiver = chan::Receiver<Document>;
 type NewSegmentSender = chan::Sender<Result<(SegmentId, usize)>>;
 type NewSegmentReceiver = chan::Receiver<Result<(SegmentId, usize)>>;

+/// `IndexWriter` is the user entry-point to add document to an index.
+///
+/// It manages a small number of indexing thread, as well as a shared
+/// indexing queue.
+/// Each indexing thread builds its own independant `Segment`, via
+/// a `SegmentWriter` object.
 pub struct IndexWriter {
 	index: Index,
 	heap_size_in_bytes_per_thread: usize,
@@ -68,7 +74,6 @@ fn index_documents(heap: &mut Heap,
 }


-
 impl IndexWriter {

 	/// Spawns a new worker thread for indexing.
@@ -139,7 +144,8 @@ impl IndexWriter {
 		}
 		Ok(())
 	}
-
+	
+	/// Merges a given list of segments
 	pub fn merge(&mut self, segments: &[Segment]) -> Result<()> {
 		let schema = self.index.schema();
 		let merger = try!(IndexMerger::open(schema, segments));
--- a/src/indexer/segment_serializer.rs
+++ b/src/indexer/segment_serializer.rs
@@ -1,5 +1,4 @@
 use Result;
-use Error;

 use std::io::Write;
 use rustc_serialize::json;
@@ -10,6 +9,9 @@ use fastfield::FastFieldSerializer;
 use store::StoreWriter;
 use postings::PostingsSerializer;

+
+/// Segment serializer is in charge of laying out on disk
+/// the data accumulated and sorted by the `SegmentWriter`.
 pub struct SegmentSerializer {
    segment: Segment,
    store_writer: StoreWriter,
@@ -19,7 +21,8 @@ pub struct SegmentSerializer {
 }

 impl SegmentSerializer {
-
+    
+    /// Creates a new `SegmentSerializer`.
    pub fn for_segment(segment: &mut Segment) -> Result<SegmentSerializer>  {
        let store_write = try!(segment.open_write(SegmentComponent::STORE));

@@ -38,23 +41,28 @@ impl SegmentSerializer {
            fieldnorms_serializer: fieldnorms_serializer,
        })
    }
-
+    
+    /// Accessor to the `PostingsSerializer`.
    pub fn get_postings_serializer(&mut self,) -> &mut PostingsSerializer {
        &mut self.postings_serializer
    }

+    /// Accessor to the `FastFieldSerializer`.
    pub fn get_fast_field_serializer(&mut self,) -> &mut FastFieldSerializer {
        &mut self.fast_field_serializer
    }
    
+    /// Accessor to the field norm serializer.
    pub fn get_fieldnorms_serializer(&mut self,) -> &mut FastFieldSerializer {
        &mut self.fieldnorms_serializer
    }
-
+    
+    /// Accessor to the `StoreWriter`.
    pub fn get_store_writer(&mut self,) -> &mut StoreWriter {
        &mut self.store_writer
    }
-
+    
+    /// Write the `SegmentInfo`
    pub fn write_segment_info(&mut self, segment_info: &SegmentInfo) -> Result<()> {
        let mut write = try!(self.segment.open_write(SegmentComponent::INFO));
        let json_data = json::encode(segment_info)
@@ -63,7 +71,8 @@ impl SegmentSerializer {
        try!(write.flush());
        Ok(())
    }
-
+    
+    /// Finalize the segment serialization.
    pub fn close(mut self,) -> Result<()> {
        try!(self.fast_field_serializer.close());
        try!(self.postings_serializer.close());
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -10,6 +10,11 @@

 #![warn(missing_docs)]

+//! # `tantivy`
+//!
+//! Tantivy is a search engine library. 
+//! Think `Lucene`, but in Rust.
+
 #[macro_use]
 extern crate lazy_static;

@@ -61,18 +66,22 @@ pub use error::{Result, Error};
 mod analyzer;
 mod datastruct;

-pub mod postings;
-pub mod query;
-pub mod directory;

+/// Query module
+pub mod query;
+/// Directory module
+pub mod directory;
+/// Collector module
 pub mod collector;
+/// Postings module (also called inverted index)
+pub mod postings;
+/// Schema
 pub mod schema;

+
 pub use directory::Directory;
 pub use core::searcher::Searcher;

-
-/// 
 pub use core::Index;
 pub use indexer::IndexWriter;
 pub use schema::Term;
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -1,3 +1,9 @@
+/// Postings module
+///
+/// Postings, also called inverted lists, is the key datastructure
+/// to full-text search.
+
+
 mod postings;
 mod recorder;
 mod serializer;
--- a/src/query/mod.rs
+++ b/src/query/mod.rs
@@ -1,3 +1,8 @@
+/// Query module
+/// 
+/// The query module regroups all of tantivy's query objects
+///
+
 mod query;
 mod multi_term_query;
 mod multi_term_accumulator;