mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-31 15:40:40 +00:00
283 lines
11 KiB
Rust
283 lines
11 KiB
Rust
/// Fast field module
|
|
///
|
|
/// Fast fields are the equivalent of `DocValues` in `Lucene`.
|
|
/// Fast fields are stored in column-oriented fashion and allow fast
|
|
/// random access given a `DocId`.
|
|
///
|
|
/// Their performance is comparable to that of an array lookup.
|
|
/// They are useful when a field is required for all or most of
|
|
/// the `DocSet` : for instance for scoring, grouping, filtering, or facetting.
|
|
///
|
|
/// Currently only u32 fastfield are supported.
|
|
|
|
mod reader;
|
|
mod writer;
|
|
mod serializer;
|
|
|
|
pub use self::writer::{U32FastFieldsWriter, U32FastFieldWriter};
|
|
pub use self::reader::{U32FastFieldsReader, U32FastFieldReader};
|
|
pub use self::serializer::FastFieldSerializer;
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use schema::Field;
|
|
use std::path::Path;
|
|
use directory::{Directory, WritePtr, RAMDirectory};
|
|
use schema::Document;
|
|
use schema::{Schema, SchemaBuilder};
|
|
use schema::FAST;
|
|
use test::Bencher;
|
|
use test;
|
|
use rand::Rng;
|
|
use rand::SeedableRng;
|
|
use rand::XorShiftRng;
|
|
|
|
lazy_static! {
|
|
static ref SCHEMA: Schema = {
|
|
let mut schema_builder = SchemaBuilder::default();
|
|
schema_builder.add_u32_field("field", FAST);
|
|
schema_builder.build()
|
|
};
|
|
static ref FIELD: Field = {
|
|
SCHEMA.get_field("field").unwrap()
|
|
};
|
|
}
|
|
|
|
fn add_single_field_doc(fast_field_writers: &mut U32FastFieldsWriter, field: Field, value: u32) {
|
|
let mut doc = Document::default();
|
|
doc.add_u32(field, value);
|
|
fast_field_writers.add_document(&doc);
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_fastfield() {
|
|
let test_fastfield = U32FastFieldReader::from(vec!(100,200,300));
|
|
println!("{}", test_fastfield.get(0));
|
|
println!("{}", test_fastfield.get(1));
|
|
println!("{}", test_fastfield.get(2));
|
|
assert_eq!(test_fastfield.get(0), 100);
|
|
assert_eq!(test_fastfield.get(1), 200);
|
|
assert_eq!(test_fastfield.get(2), 300);
|
|
}
|
|
|
|
#[test]
|
|
fn test_intfastfield_small() {
|
|
let path = Path::new("test");
|
|
let mut directory: RAMDirectory = RAMDirectory::create();
|
|
{
|
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut serializer = FastFieldSerializer::new(write).unwrap();
|
|
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 13u32);
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 14u32);
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 2u32);
|
|
fast_field_writers.serialize(&mut serializer).unwrap();
|
|
serializer.close().unwrap();
|
|
}
|
|
let source = directory.open_read(&path).unwrap();
|
|
{
|
|
assert_eq!(source.len(), 26 as usize);
|
|
}
|
|
{
|
|
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
|
|
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
|
assert_eq!(fast_field_reader.get(0), 13u32);
|
|
assert_eq!(fast_field_reader.get(1), 14u32);
|
|
assert_eq!(fast_field_reader.get(2), 2u32);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_intfastfield_large() {
|
|
let path = Path::new("test");
|
|
let mut directory: RAMDirectory = RAMDirectory::create();
|
|
{
|
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut serializer = FastFieldSerializer::new(write).unwrap();
|
|
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 4u32);
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 14_082_001u32);
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 3_052u32);
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 9002u32);
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 15_001u32);
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 777u32);
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 1_002u32);
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 1_501u32);
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 215u32);
|
|
fast_field_writers.serialize(&mut serializer).unwrap();
|
|
serializer.close().unwrap();
|
|
}
|
|
let source = directory.open_read(&path).unwrap();
|
|
{
|
|
assert_eq!(source.len(), 50 as usize);
|
|
}
|
|
{
|
|
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
|
|
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
|
assert_eq!(fast_field_reader.get(0), 4u32);
|
|
assert_eq!(fast_field_reader.get(1), 14_082_001u32);
|
|
assert_eq!(fast_field_reader.get(2), 3_052u32);
|
|
assert_eq!(fast_field_reader.get(3), 9002u32);
|
|
assert_eq!(fast_field_reader.get(4), 15_001u32);
|
|
assert_eq!(fast_field_reader.get(5), 777u32);
|
|
assert_eq!(fast_field_reader.get(6), 1_002u32);
|
|
assert_eq!(fast_field_reader.get(7), 1_501u32);
|
|
assert_eq!(fast_field_reader.get(8), 215u32);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_intfastfield_null_amplitude() {
|
|
let path = Path::new("test");
|
|
let mut directory: RAMDirectory = RAMDirectory::create();
|
|
|
|
|
|
{
|
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut serializer = FastFieldSerializer::new(write).unwrap();
|
|
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
|
|
for _ in 0..10_000 {
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, 100_000u32);
|
|
}
|
|
fast_field_writers.serialize(&mut serializer).unwrap();
|
|
serializer.close().unwrap();
|
|
}
|
|
let source = directory.open_read(&path).unwrap();
|
|
{
|
|
assert_eq!(source.len(), 26 as usize);
|
|
}
|
|
{
|
|
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
|
|
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
|
for doc in 0..10_000 {
|
|
assert_eq!(fast_field_reader.get(doc), 100_000u32);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn generate_permutation() -> Vec<u32> {
|
|
let seed: &[u32; 4] = &[1, 2, 3, 4];
|
|
let mut rng = XorShiftRng::from_seed(*seed);
|
|
let mut permutation: Vec<u32> = (0u32..1_000_000u32).collect();
|
|
rng.shuffle(&mut permutation);
|
|
permutation
|
|
}
|
|
|
|
#[test]
|
|
fn test_intfastfield_permutation() {
|
|
let path = Path::new("test");
|
|
let permutation = generate_permutation();
|
|
let n = permutation.len();
|
|
let mut directory = RAMDirectory::create();
|
|
{
|
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut serializer = FastFieldSerializer::new(write).unwrap();
|
|
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
|
|
for x in &permutation {
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
|
|
}
|
|
fast_field_writers.serialize(&mut serializer).unwrap();
|
|
serializer.close().unwrap();
|
|
}
|
|
let source = directory.open_read(&path).unwrap();
|
|
{
|
|
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
|
|
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
|
let mut a = 0u32;
|
|
for _ in 0..n {
|
|
assert_eq!(fast_field_reader.get(a as u32), permutation[a as usize]);
|
|
a = fast_field_reader.get(a as u32);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[bench]
|
|
fn bench_intfastfield_linear_veclookup(b: &mut Bencher) {
|
|
let permutation = generate_permutation();
|
|
b.iter(|| {
|
|
let n = test::black_box(7000u32);
|
|
let mut a = 0u32;
|
|
for i in (0u32..n).step_by(7) {
|
|
a ^= permutation[i as usize];
|
|
}
|
|
a
|
|
});
|
|
}
|
|
|
|
#[bench]
|
|
fn bench_intfastfield_veclookup(b: &mut Bencher) {
|
|
let permutation = generate_permutation();
|
|
b.iter(|| {
|
|
let n = test::black_box(1000u32);
|
|
let mut a = 0u32;
|
|
for _ in 0u32..n {
|
|
a = permutation[a as usize];
|
|
}
|
|
a
|
|
});
|
|
}
|
|
|
|
#[bench]
|
|
fn bench_intfastfield_linear_fflookup(b: &mut Bencher) {
|
|
let path = Path::new("test");
|
|
let permutation = generate_permutation();
|
|
let mut directory: RAMDirectory = RAMDirectory::create();
|
|
{
|
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut serializer = FastFieldSerializer::new(write).unwrap();
|
|
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
|
|
for x in &permutation {
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
|
|
}
|
|
fast_field_writers.serialize(&mut serializer).unwrap();
|
|
serializer.close().unwrap();
|
|
}
|
|
let source = directory.open_read(&path).unwrap();
|
|
{
|
|
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
|
|
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
|
b.iter(|| {
|
|
let n = test::black_box(7000u32);
|
|
let mut a = 0u32;
|
|
for i in (0u32..n).step_by(7) {
|
|
a ^= fast_field_reader.get(i);
|
|
}
|
|
a
|
|
});
|
|
}
|
|
}
|
|
|
|
#[bench]
|
|
fn bench_intfastfield_fflookup(b: &mut Bencher) {
|
|
let path = Path::new("test");
|
|
let permutation = generate_permutation();
|
|
let mut directory: RAMDirectory = RAMDirectory::create();
|
|
{
|
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
let mut serializer = FastFieldSerializer::new(write).unwrap();
|
|
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&SCHEMA);
|
|
for x in &permutation {
|
|
add_single_field_doc(&mut fast_field_writers, *FIELD, *x);
|
|
}
|
|
fast_field_writers.serialize(&mut serializer).unwrap();
|
|
serializer.close().unwrap();
|
|
}
|
|
let source = directory.open_read(&path).unwrap();
|
|
{
|
|
let fast_field_readers = U32FastFieldsReader::open(source).unwrap();
|
|
let fast_field_reader = fast_field_readers.get_field(*FIELD).unwrap();
|
|
b.iter(|| {
|
|
let n = test::black_box(1000u32);
|
|
let mut a = 0u32;
|
|
for _ in 0u32..n {
|
|
a = fast_field_reader.get(a);
|
|
}
|
|
a
|
|
});
|
|
}
|
|
}
|
|
}
|