From 1a08ca4f95b690c6d4ae3f737dd8ab41380dbb82 Mon Sep 17 00:00:00 2001
From: Paul Masurel <paul.masurel@gmail.com>
Date: Thu, 22 Sep 2016 15:45:37 +0900
Subject: [PATCH] Doc

---
 src/common/mod.rs                 |  5 +++++
 src/common/timer.rs               | 10 +++++++++-
 src/core/index.rs                 |  5 ++++-
 src/core/segment_reader.rs        | 16 ++++++++++++----
 src/directory/error.rs            |  1 -
 src/directory/mod.rs              |  2 ++
 src/error.rs                      |  9 ++++++---
 src/fastfield/mod.rs              | 12 ++++++++++++
 src/indexer/index_writer.rs       | 10 ++++++++--
 src/indexer/segment_serializer.rs | 21 +++++++++++++++------
 src/lib.rs                        | 19 ++++++++++++++-----
 src/postings/mod.rs               |  6 ++++++
 src/query/mod.rs                  |  5 +++++
 13 files changed, 98 insertions(+), 23 deletions(-)
diff --git a/src/common/mod.rs b/src/common/mod.rs
index d3beadb48..e4322d27e 100644
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -14,8 +14,13 @@ pub fn make_io_err(msg: String) -> io::Error {
     io::Error::new(io::ErrorKind::Other, msg)
 }
 
+
+/// Has length trait
 pub trait HasLen {
+    /// Return length
     fn len(&self,) -> usize;
+    
+    /// Returns true iff empty.
     fn is_empty(&self,) -> bool {
         self.len() == 0
     }
diff --git a/src/common/timer.rs b/src/common/timer.rs
index ae1d3959e..3f3950422 100644
--- a/src/common/timer.rs
+++ b/src/common/timer.rs
@@ -8,6 +8,10 @@ pub struct OpenTimer<'a> {
 }
 
 impl<'a> OpenTimer<'a> {
+    /// Starts timing a new named subtask
+    ///
+    /// The timer is stopped automatically 
+    /// when the `OpenTimer` is dropped.
     pub fn open(&mut self, name: &'static str) -> OpenTimer {
         OpenTimer {
             name: name,
@@ -28,6 +32,7 @@ impl<'a> Drop for OpenTimer<'a> {
     }
 }
 
+/// Timing recording
 #[derive(Debug, RustcEncodable)]
 pub struct Timing {
     name: &'static str,
@@ -35,17 +40,20 @@ pub struct Timing {
     depth: u32,
 }
 
+/// Timer tree
 #[derive(Debug, RustcEncodable)]
 pub struct TimerTree {
     timings: Vec<Timing>,
 }
 
 impl TimerTree {
-    
+        
+    /// Returns the total time elapsed in microseconds 
     pub fn total_time(&self,) -> i64 {
         self.timings.last().unwrap().duration
     }
     
+    /// Open a new named subtask
     pub fn open(&mut self, name: &'static str) -> OpenTimer {
         OpenTimer {
             name: name,
diff --git a/src/core/index.rs b/src/core/index.rs
index d1f34c9d8..ac2b287cf 100644
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -150,7 +150,10 @@ impl Index {
     pub fn writer(&self, heap_size_in_bytes: usize) -> Result<IndexWriter> {
         self.writer_with_num_threads(num_cpus::get(), heap_size_in_bytes)
     }
-
+    
+    /// Accessor to the index schema
+    ///
+    /// The schema is actually cloned.
     pub fn schema(&self,) -> Schema {
         self.schema.clone()
     }
diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs
index 99153861f..be1e46ec2 100644
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -65,7 +65,8 @@ impl SegmentReader {
     pub fn num_docs(&self) -> DocId {
         self.segment_info.max_doc
     }
-
+    
+    /// Accessor to a segment's fast field reader given a field.
     pub fn get_fast_field_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
         let field_entry = self.schema.get_field_entry(field);
         match *field_entry.field_type() {
@@ -80,11 +81,17 @@ impl SegmentReader {
         }
     }
     
+    /// Accessor to the segment's `Field norms`'s reader.
+    ///
+    /// Field norms are the length (in tokens) of the fields.
+    /// It is used in the computation of the [TfIdf](https://fulmicoton.gitbooks.io/tantivy-doc/content/tfidf.html).
+    ///
+    /// They are simply stored as a fast field, serialized in 
+    /// the `.fieldnorm` file of the segment. 
     pub fn get_fieldnorms_reader(&self, field: Field) -> io::Result<U32FastFieldReader> {
         self.fieldnorms_reader.get_field(field) 
     }
-    
-    
+        
     /// Returns the number of documents containing the term.
     pub fn doc_freq(&self, term: &Term) -> u32 {
         match self.get_term_info(term) {
@@ -92,7 +99,8 @@ impl SegmentReader {
             None => 0,
         }
     }    
-
+    
+    /// Accessor to the segment's `StoreReader`.
     pub fn get_store_reader(&self) -> &StoreReader {
         &self.store_reader
     }
diff --git a/src/directory/error.rs b/src/directory/error.rs
index 5beed41e2..a49ea23b7 100644
--- a/src/directory/error.rs
+++ b/src/directory/error.rs
@@ -1,7 +1,6 @@
 use std::path::PathBuf;
 use std::io;
 
-
 /// Error that may occur when opening a directory
 #[derive(Debug)]
 pub enum OpenDirectoryError {
diff --git a/src/directory/mod.rs b/src/directory/mod.rs
index 505ac8a7d..241d2888c 100644
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -3,6 +3,8 @@ mod ram_directory;
 mod directory;
 mod read_only_source;
 mod shared_vec_slice;
+
+/// Errors specific to the directory module.
 pub mod error;
 
 use std::io::{Seek, Write};
diff --git a/src/error.rs b/src/error.rs
index a51b369f0..0f7bf1358 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,5 +1,7 @@
 #![allow(enum_variant_names)]
 
+/// Definition of Tantivy's error and result.
+
 use std::io;
 use std::result;
 use std::path::PathBuf;
@@ -10,6 +12,10 @@ use query;
 use schema;
 
 
+/// Tantivy result.
+pub type Result<T> = result::Result<T, Error>;
+
+
 /// Generic tantivy error.
 ///
 /// Any specialized error return in tantivy can be converted in `tantivy::Error`.
@@ -87,6 +93,3 @@ impl From<OpenDirectoryError> for Error {
         }
     }
 }
-
-/// Tantivy result.
-pub type Result<T> = result::Result<T, Error>;
diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs
index 0ecbf1f30..02dca74e9 100644
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -1,3 +1,15 @@
+/// FastField module
+///
+/// FastField are the equivalent of `DocValues` in `Lucene`.
+/// FastFields are stored in column-oriented fashion and allow fast
+/// random access given a `DocId`.
+///
+/// Their performance is comparable to that of an array lookup.
+/// FastField are useful when a field is required for all or most of
+/// the `DocSet` : for instance for scoring, grouping, filtering, or facetting.
+/// 
+/// Currently only u32 fastfield are supported.
+
 mod reader;
 mod writer;
 mod serializer;
diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs
index 0b9271567..519d34825 100644
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -36,6 +36,12 @@ type DocumentReceiver = chan::Receiver<Document>;
 type NewSegmentSender = chan::Sender<Result<(SegmentId, usize)>>;
 type NewSegmentReceiver = chan::Receiver<Result<(SegmentId, usize)>>;
 
+/// `IndexWriter` is the user entry-point to add document to an index.
+///
+/// It manages a small number of indexing thread, as well as a shared
+/// indexing queue.
+/// Each indexing thread builds its own independant `Segment`, via
+/// a `SegmentWriter` object.
 pub struct IndexWriter {
 	index: Index,
 	heap_size_in_bytes_per_thread: usize,
@@ -68,7 +74,6 @@ fn index_documents(heap: &mut Heap,
 }
 
 
-
 impl IndexWriter {
 
 	/// Spawns a new worker thread for indexing.
@@ -139,7 +144,8 @@ impl IndexWriter {
 		}
 		Ok(())
 	}
-
+	
+	/// Merges a given list of segments
 	pub fn merge(&mut self, segments: &[Segment]) -> Result<()> {
 		let schema = self.index.schema();
 		let merger = try!(IndexMerger::open(schema, segments));
diff --git a/src/indexer/segment_serializer.rs b/src/indexer/segment_serializer.rs
index 6f7ef3ad4..6e27f6a94 100644
--- a/src/indexer/segment_serializer.rs
+++ b/src/indexer/segment_serializer.rs
@@ -1,5 +1,4 @@
 use Result;
-use Error;
 
 use std::io::Write;
 use rustc_serialize::json;
@@ -10,6 +9,9 @@ use fastfield::FastFieldSerializer;
 use store::StoreWriter;
 use postings::PostingsSerializer;
 
+
+/// Segment serializer is in charge of laying out on disk
+/// the data accumulated and sorted by the `SegmentWriter`.
 pub struct SegmentSerializer {
     segment: Segment,
     store_writer: StoreWriter,
@@ -19,7 +21,8 @@ pub struct SegmentSerializer {
 }
 
 impl SegmentSerializer {
-
+    
+    /// Creates a new `SegmentSerializer`.
     pub fn for_segment(segment: &mut Segment) -> Result<SegmentSerializer>  {
         let store_write = try!(segment.open_write(SegmentComponent::STORE));
 
@@ -38,23 +41,28 @@ impl SegmentSerializer {
             fieldnorms_serializer: fieldnorms_serializer,
         })
     }
-
+    
+    /// Accessor to the `PostingsSerializer`.
     pub fn get_postings_serializer(&mut self,) -> &mut PostingsSerializer {
         &mut self.postings_serializer
     }
 
+    /// Accessor to the `FastFieldSerializer`.
     pub fn get_fast_field_serializer(&mut self,) -> &mut FastFieldSerializer {
         &mut self.fast_field_serializer
     }
     
+    /// Accessor to the field norm serializer.
     pub fn get_fieldnorms_serializer(&mut self,) -> &mut FastFieldSerializer {
         &mut self.fieldnorms_serializer
     }
-
+    
+    /// Accessor to the `StoreWriter`.
     pub fn get_store_writer(&mut self,) -> &mut StoreWriter {
         &mut self.store_writer
     }
-
+    
+    /// Write the `SegmentInfo`
     pub fn write_segment_info(&mut self, segment_info: &SegmentInfo) -> Result<()> {
         let mut write = try!(self.segment.open_write(SegmentComponent::INFO));
         let json_data = json::encode(segment_info)
@@ -63,7 +71,8 @@ impl SegmentSerializer {
         try!(write.flush());
         Ok(())
     }
-
+    
+    /// Finalize the segment serialization.
     pub fn close(mut self,) -> Result<()> {
         try!(self.fast_field_serializer.close());
         try!(self.postings_serializer.close());
diff --git a/src/lib.rs b/src/lib.rs
index 61cbb6a50..0f335c425 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -10,6 +10,11 @@
 
 #![warn(missing_docs)]
 
+//! # `tantivy`
+//!
+//! Tantivy is a search engine library. 
+//! Think `Lucene`, but in Rust.
+
 #[macro_use]
 extern crate lazy_static;
 
@@ -61,18 +66,22 @@ pub use error::{Result, Error};
 mod analyzer;
 mod datastruct;
 
-pub mod postings;
-pub mod query;
-pub mod directory;
 
+/// Query module
+pub mod query;
+/// Directory module
+pub mod directory;
+/// Collector module
 pub mod collector;
+/// Postings module (also called inverted index)
+pub mod postings;
+/// Schema
 pub mod schema;
 
+
 pub use directory::Directory;
 pub use core::searcher::Searcher;
 
-
-/// 
 pub use core::Index;
 pub use indexer::IndexWriter;
 pub use schema::Term;
diff --git a/src/postings/mod.rs b/src/postings/mod.rs
index 7f955b9fd..374f08c33 100644
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -1,3 +1,9 @@
+/// Postings module
+///
+/// Postings, also called inverted lists, is the key datastructure
+/// to full-text search.
+
+
 mod postings;
 mod recorder;
 mod serializer;
diff --git a/src/query/mod.rs b/src/query/mod.rs
index bc0ecad23..12af7d35c 100644
--- a/src/query/mod.rs
+++ b/src/query/mod.rs
@@ -1,3 +1,8 @@
+/// Query module
+/// 
+/// The query module regroups all of tantivy's query objects
+///
+
 mod query;
 mod multi_term_query;
 mod multi_term_accumulator;