issues/65 Added comments

Closes #65 Closes #132
2026-05-22 03:00:42 +00:00 · 2017-05-06 23:08:20 +09:00
parent 2947364ae1
commit 0dad02791c
6 changed files with 67 additions and 19 deletions
--- a/src/common/bitpacker.rs
+++ b/src/common/bitpacker.rs
@@ -4,6 +4,30 @@ use common::serialize::BinarySerializable;
 use std::mem;


+/// Computes the number of bits that will be used for bitpacking.
+///
+/// In general the target is the minimum number of bits 
+/// required to express the amplitude given in argument.
+///
+/// e.g. If the amplitude is 10, we can store all ints on simply 4bits.
+/// 
+/// The logic is slightly more convoluted here as for optimization
+/// reasons, we want to ensure that a value spawns over at most 8 bytes
+/// of aligns bytes.
+/// 
+/// Spawning over 9 bytes is possible for instance, if we do 
+/// bitpacking with an amplitude of 63 bits.
+/// In this case, the second int will start on bit
+/// 63 (which belongs to byte 7) and ends at byte 15;  
+/// Hence 9 bytes (from byte 7 to byte 15 included).
+///
+/// To avoid this, we force the number of bits to 64bits
+/// when the result is greater than `64-8 = 56 bits`.
+///
+/// Note that this only affects rare use cases spawning over
+/// a very large range of values. Even in this case, it results
+/// in an extra cost of at most 12% compared to the optimal
+/// number of bits.
 pub fn compute_num_bits(amplitude: u64) -> u8 {
    let amplitude = (64u32 - amplitude.leading_zeros()) as u8;
    if amplitude <= 64 - 8 {
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -1,14 +1,25 @@
-/// Fast field module
-///
-/// Fast fields are the equivalent of `DocValues` in `Lucene`.
-/// Fast fields are stored in column-oriented fashion and allow fast
-/// random access given a `DocId`.
-///
-/// Their performance is comparable to that of an array lookup.
-/// They are useful when a field is required for all or most of
-/// the `DocSet` : for instance for scoring, grouping, filtering, or facetting.
-/// 
-/// Currently only u64 fastfield are supported.
+//! # Fast fields
+//!
+//! Fast fields are the equivalent of `DocValues` in `Lucene`.
+//! Fast fields is a non-compressed column-oriented fashion storage
+//! of `tantivy`.
+//! 
+//! It is designed for the fast random access of some document
+//! fields given a document id.
+//!
+//! `FastField` are useful when a field is required for all or most of
+//! the `DocSet` : for instance for scoring, grouping, filtering, or facetting.
+//! 
+//! 
+//! Fields have to be declared as `FAST` in the  schema.
+//! Currently only 64-bits integers (signed or unsigned) are
+//! supported.
+//!
+//! They are stored in a bitpacked fashion so that their
+//! memory usage is directly linear with the amplitude of the 
+//! values stored.
+//!
+//! Read access performance is comparable to that of an array lookup.

 mod reader;
 mod writer;
--- a/src/fastfield/reader.rs
+++ b/src/fastfield/reader.rs
@@ -14,7 +14,10 @@ use common::bitpacker::BitUnpacker;
 use schema::FieldType;
 use common;

-
+/// Trait for accessing a fastfield.
+///
+/// Depending on the field type, a different
+/// fast field is required.
 pub trait FastFieldReader: Sized {

    /// Type of the value stored in the fastfield.
@@ -35,6 +38,7 @@ pub trait FastFieldReader: Sized {
    fn is_enabled(field_type: &FieldType) -> bool;
 }

+/// FastFieldReader for unsigned 64-bits integers.
 pub struct U64FastFieldReader {
    _data: ReadOnlySource,
    bit_unpacker: BitUnpacker,
@@ -131,8 +135,7 @@ impl From<Vec<u64>> for U64FastFieldReader {
     }
 }

-
-
+/// FastFieldReader for signed 64-bits integers.
 pub struct I64FastFieldReader {
    underlying: U64FastFieldReader,
 }
@@ -192,7 +195,11 @@ impl FastFieldReader for I64FastFieldReader {



-
+/// The FastFieldsReader` is the datastructure containing
+/// all of the fast fields' data.
+///
+/// It contains a mapping that associated these fields to 
+/// the proper slice in the fastfield reader file.
 pub struct FastFieldsReader {
    source: ReadOnlySource,
    field_offsets: HashMap<Field, (u32, u32)>,
@@ -200,6 +207,11 @@ pub struct FastFieldsReader {

 impl FastFieldsReader {

+    /// Opens the `FastFieldsReader` file
+    ///
+    /// When opening the fast field reader, the
+    /// the list of the offset is read (as a footer of the 
+    /// data file).
    pub fn open(source: ReadOnlySource) -> io::Result<FastFieldsReader> {
        let header_offset;
        let field_offsets: Vec<(Field, u32)>;
@@ -207,11 +219,11 @@ impl FastFieldsReader {
            let buffer = source.as_slice();
            {
                let mut cursor = buffer;
-                header_offset = try!(u32::deserialize(&mut cursor));
+                header_offset = u32::deserialize(&mut cursor)?;
            }
            {
                let mut cursor = &buffer[header_offset as usize..];
-                field_offsets = try!(Vec::deserialize(&mut cursor));    
+                field_offsets = Vec::deserialize(&mut cursor)?;    
            }
        }
        let mut end_offsets: Vec<u32> = field_offsets
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -6,6 +6,7 @@ use DocId;
 use common;
 use schema::FieldType;

+/// The fastfieldswriter regroup all of the fast field writers.
 pub struct FastFieldsWriter {
    field_writers: Vec<IntFastFieldWriter>,
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -92,7 +92,6 @@ pub type Result<T> = std::result::Result<T, Error>;

 mod core;
 mod compression;
-
 mod store;
 mod indexer;
 mod common;
@@ -112,7 +111,7 @@ pub mod collector;
 pub mod postings;
 /// Schema
 pub mod schema;
-// FastField module
+
 pub mod fastfield;


--- a/src/schema/field_type.rs
+++ b/src/schema/field_type.rs
@@ -31,6 +31,7 @@ pub enum FieldType {

 impl FieldType {
    
+    /// returns true iff the field is indexed.
    pub fn is_indexed(&self) -> bool {
        match self {
            &FieldType::Str(ref text_options) => {