issues/65 Added comments

Closes #65
Closes #132
This commit is contained in:
Paul Masurel
2017-05-06 23:08:20 +09:00
parent 2947364ae1
commit 0dad02791c
6 changed files with 67 additions and 19 deletions

View File

@@ -4,6 +4,30 @@ use common::serialize::BinarySerializable;
use std::mem;
/// Computes the number of bits that will be used for bitpacking.
///
/// In general the target is the minimum number of bits
/// required to express the amplitude given in argument.
///
/// e.g. If the amplitude is 10, we can store all ints on simply 4bits.
///
/// The logic is slightly more convoluted here as for optimization
/// reasons, we want to ensure that a value spawns over at most 8 bytes
/// of aligns bytes.
///
/// Spawning over 9 bytes is possible for instance, if we do
/// bitpacking with an amplitude of 63 bits.
/// In this case, the second int will start on bit
/// 63 (which belongs to byte 7) and ends at byte 15;
/// Hence 9 bytes (from byte 7 to byte 15 included).
///
/// To avoid this, we force the number of bits to 64bits
/// when the result is greater than `64-8 = 56 bits`.
///
/// Note that this only affects rare use cases spawning over
/// a very large range of values. Even in this case, it results
/// in an extra cost of at most 12% compared to the optimal
/// number of bits.
pub fn compute_num_bits(amplitude: u64) -> u8 {
let amplitude = (64u32 - amplitude.leading_zeros()) as u8;
if amplitude <= 64 - 8 {

View File

@@ -1,14 +1,25 @@
/// Fast field module
///
/// Fast fields are the equivalent of `DocValues` in `Lucene`.
/// Fast fields are stored in column-oriented fashion and allow fast
/// random access given a `DocId`.
///
/// Their performance is comparable to that of an array lookup.
/// They are useful when a field is required for all or most of
/// the `DocSet` : for instance for scoring, grouping, filtering, or facetting.
///
/// Currently only u64 fastfield are supported.
//! # Fast fields
//!
//! Fast fields are the equivalent of `DocValues` in `Lucene`.
//! Fast fields is a non-compressed column-oriented fashion storage
//! of `tantivy`.
//!
//! It is designed for the fast random access of some document
//! fields given a document id.
//!
//! `FastField` are useful when a field is required for all or most of
//! the `DocSet` : for instance for scoring, grouping, filtering, or facetting.
//!
//!
//! Fields have to be declared as `FAST` in the schema.
//! Currently only 64-bits integers (signed or unsigned) are
//! supported.
//!
//! They are stored in a bitpacked fashion so that their
//! memory usage is directly linear with the amplitude of the
//! values stored.
//!
//! Read access performance is comparable to that of an array lookup.
mod reader;
mod writer;

View File

@@ -14,7 +14,10 @@ use common::bitpacker::BitUnpacker;
use schema::FieldType;
use common;
/// Trait for accessing a fastfield.
///
/// Depending on the field type, a different
/// fast field is required.
pub trait FastFieldReader: Sized {
/// Type of the value stored in the fastfield.
@@ -35,6 +38,7 @@ pub trait FastFieldReader: Sized {
fn is_enabled(field_type: &FieldType) -> bool;
}
/// FastFieldReader for unsigned 64-bits integers.
pub struct U64FastFieldReader {
_data: ReadOnlySource,
bit_unpacker: BitUnpacker,
@@ -131,8 +135,7 @@ impl From<Vec<u64>> for U64FastFieldReader {
}
}
/// FastFieldReader for signed 64-bits integers.
pub struct I64FastFieldReader {
underlying: U64FastFieldReader,
}
@@ -192,7 +195,11 @@ impl FastFieldReader for I64FastFieldReader {
/// The FastFieldsReader` is the datastructure containing
/// all of the fast fields' data.
///
/// It contains a mapping that associated these fields to
/// the proper slice in the fastfield reader file.
pub struct FastFieldsReader {
source: ReadOnlySource,
field_offsets: HashMap<Field, (u32, u32)>,
@@ -200,6 +207,11 @@ pub struct FastFieldsReader {
impl FastFieldsReader {
/// Opens the `FastFieldsReader` file
///
/// When opening the fast field reader, the
/// the list of the offset is read (as a footer of the
/// data file).
pub fn open(source: ReadOnlySource) -> io::Result<FastFieldsReader> {
let header_offset;
let field_offsets: Vec<(Field, u32)>;
@@ -207,11 +219,11 @@ impl FastFieldsReader {
let buffer = source.as_slice();
{
let mut cursor = buffer;
header_offset = try!(u32::deserialize(&mut cursor));
header_offset = u32::deserialize(&mut cursor)?;
}
{
let mut cursor = &buffer[header_offset as usize..];
field_offsets = try!(Vec::deserialize(&mut cursor));
field_offsets = Vec::deserialize(&mut cursor)?;
}
}
let mut end_offsets: Vec<u32> = field_offsets

View File

@@ -6,6 +6,7 @@ use DocId;
use common;
use schema::FieldType;
/// The fastfieldswriter regroup all of the fast field writers.
pub struct FastFieldsWriter {
field_writers: Vec<IntFastFieldWriter>,
}

View File

@@ -92,7 +92,6 @@ pub type Result<T> = std::result::Result<T, Error>;
mod core;
mod compression;
mod store;
mod indexer;
mod common;
@@ -112,7 +111,7 @@ pub mod collector;
pub mod postings;
/// Schema
pub mod schema;
// FastField module
pub mod fastfield;

View File

@@ -31,6 +31,7 @@ pub enum FieldType {
impl FieldType {
/// returns true iff the field is indexed.
pub fn is_indexed(&self) -> bool {
match self {
&FieldType::Str(ref text_options) => {