mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-19 09:40:45 +00:00
@@ -4,6 +4,30 @@ use common::serialize::BinarySerializable;
|
||||
use std::mem;
|
||||
|
||||
|
||||
/// Computes the number of bits that will be used for bitpacking.
|
||||
///
|
||||
/// In general the target is the minimum number of bits
|
||||
/// required to express the amplitude given in argument.
|
||||
///
|
||||
/// e.g. If the amplitude is 10, we can store all ints on simply 4bits.
|
||||
///
|
||||
/// The logic is slightly more convoluted here as for optimization
|
||||
/// reasons, we want to ensure that a value spawns over at most 8 bytes
|
||||
/// of aligns bytes.
|
||||
///
|
||||
/// Spawning over 9 bytes is possible for instance, if we do
|
||||
/// bitpacking with an amplitude of 63 bits.
|
||||
/// In this case, the second int will start on bit
|
||||
/// 63 (which belongs to byte 7) and ends at byte 15;
|
||||
/// Hence 9 bytes (from byte 7 to byte 15 included).
|
||||
///
|
||||
/// To avoid this, we force the number of bits to 64bits
|
||||
/// when the result is greater than `64-8 = 56 bits`.
|
||||
///
|
||||
/// Note that this only affects rare use cases spawning over
|
||||
/// a very large range of values. Even in this case, it results
|
||||
/// in an extra cost of at most 12% compared to the optimal
|
||||
/// number of bits.
|
||||
pub fn compute_num_bits(amplitude: u64) -> u8 {
|
||||
let amplitude = (64u32 - amplitude.leading_zeros()) as u8;
|
||||
if amplitude <= 64 - 8 {
|
||||
|
||||
@@ -1,14 +1,25 @@
|
||||
/// Fast field module
|
||||
///
|
||||
/// Fast fields are the equivalent of `DocValues` in `Lucene`.
|
||||
/// Fast fields are stored in column-oriented fashion and allow fast
|
||||
/// random access given a `DocId`.
|
||||
///
|
||||
/// Their performance is comparable to that of an array lookup.
|
||||
/// They are useful when a field is required for all or most of
|
||||
/// the `DocSet` : for instance for scoring, grouping, filtering, or facetting.
|
||||
///
|
||||
/// Currently only u64 fastfield are supported.
|
||||
//! # Fast fields
|
||||
//!
|
||||
//! Fast fields are the equivalent of `DocValues` in `Lucene`.
|
||||
//! Fast fields is a non-compressed column-oriented fashion storage
|
||||
//! of `tantivy`.
|
||||
//!
|
||||
//! It is designed for the fast random access of some document
|
||||
//! fields given a document id.
|
||||
//!
|
||||
//! `FastField` are useful when a field is required for all or most of
|
||||
//! the `DocSet` : for instance for scoring, grouping, filtering, or facetting.
|
||||
//!
|
||||
//!
|
||||
//! Fields have to be declared as `FAST` in the schema.
|
||||
//! Currently only 64-bits integers (signed or unsigned) are
|
||||
//! supported.
|
||||
//!
|
||||
//! They are stored in a bitpacked fashion so that their
|
||||
//! memory usage is directly linear with the amplitude of the
|
||||
//! values stored.
|
||||
//!
|
||||
//! Read access performance is comparable to that of an array lookup.
|
||||
|
||||
mod reader;
|
||||
mod writer;
|
||||
|
||||
@@ -14,7 +14,10 @@ use common::bitpacker::BitUnpacker;
|
||||
use schema::FieldType;
|
||||
use common;
|
||||
|
||||
|
||||
/// Trait for accessing a fastfield.
|
||||
///
|
||||
/// Depending on the field type, a different
|
||||
/// fast field is required.
|
||||
pub trait FastFieldReader: Sized {
|
||||
|
||||
/// Type of the value stored in the fastfield.
|
||||
@@ -35,6 +38,7 @@ pub trait FastFieldReader: Sized {
|
||||
fn is_enabled(field_type: &FieldType) -> bool;
|
||||
}
|
||||
|
||||
/// FastFieldReader for unsigned 64-bits integers.
|
||||
pub struct U64FastFieldReader {
|
||||
_data: ReadOnlySource,
|
||||
bit_unpacker: BitUnpacker,
|
||||
@@ -131,8 +135,7 @@ impl From<Vec<u64>> for U64FastFieldReader {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// FastFieldReader for signed 64-bits integers.
|
||||
pub struct I64FastFieldReader {
|
||||
underlying: U64FastFieldReader,
|
||||
}
|
||||
@@ -192,7 +195,11 @@ impl FastFieldReader for I64FastFieldReader {
|
||||
|
||||
|
||||
|
||||
|
||||
/// The FastFieldsReader` is the datastructure containing
|
||||
/// all of the fast fields' data.
|
||||
///
|
||||
/// It contains a mapping that associated these fields to
|
||||
/// the proper slice in the fastfield reader file.
|
||||
pub struct FastFieldsReader {
|
||||
source: ReadOnlySource,
|
||||
field_offsets: HashMap<Field, (u32, u32)>,
|
||||
@@ -200,6 +207,11 @@ pub struct FastFieldsReader {
|
||||
|
||||
impl FastFieldsReader {
|
||||
|
||||
/// Opens the `FastFieldsReader` file
|
||||
///
|
||||
/// When opening the fast field reader, the
|
||||
/// the list of the offset is read (as a footer of the
|
||||
/// data file).
|
||||
pub fn open(source: ReadOnlySource) -> io::Result<FastFieldsReader> {
|
||||
let header_offset;
|
||||
let field_offsets: Vec<(Field, u32)>;
|
||||
@@ -207,11 +219,11 @@ impl FastFieldsReader {
|
||||
let buffer = source.as_slice();
|
||||
{
|
||||
let mut cursor = buffer;
|
||||
header_offset = try!(u32::deserialize(&mut cursor));
|
||||
header_offset = u32::deserialize(&mut cursor)?;
|
||||
}
|
||||
{
|
||||
let mut cursor = &buffer[header_offset as usize..];
|
||||
field_offsets = try!(Vec::deserialize(&mut cursor));
|
||||
field_offsets = Vec::deserialize(&mut cursor)?;
|
||||
}
|
||||
}
|
||||
let mut end_offsets: Vec<u32> = field_offsets
|
||||
|
||||
@@ -6,6 +6,7 @@ use DocId;
|
||||
use common;
|
||||
use schema::FieldType;
|
||||
|
||||
/// The fastfieldswriter regroup all of the fast field writers.
|
||||
pub struct FastFieldsWriter {
|
||||
field_writers: Vec<IntFastFieldWriter>,
|
||||
}
|
||||
|
||||
@@ -92,7 +92,6 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
mod core;
|
||||
mod compression;
|
||||
|
||||
mod store;
|
||||
mod indexer;
|
||||
mod common;
|
||||
@@ -112,7 +111,7 @@ pub mod collector;
|
||||
pub mod postings;
|
||||
/// Schema
|
||||
pub mod schema;
|
||||
// FastField module
|
||||
|
||||
pub mod fastfield;
|
||||
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ pub enum FieldType {
|
||||
|
||||
impl FieldType {
|
||||
|
||||
/// returns true iff the field is indexed.
|
||||
pub fn is_indexed(&self) -> bool {
|
||||
match self {
|
||||
&FieldType::Str(ref text_options) => {
|
||||
|
||||
Reference in New Issue
Block a user