mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-03 15:52:55 +00:00
Compare commits
2 Commits
githubacti
...
criterion
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a854a60e2a | ||
|
|
92d73a6bfb |
14
Cargo.toml
14
Cargo.toml
@@ -62,6 +62,7 @@ rand = "0.7"
|
|||||||
maplit = "1"
|
maplit = "1"
|
||||||
matches = "0.1.8"
|
matches = "0.1.8"
|
||||||
time = "0.1.42"
|
time = "0.1.42"
|
||||||
|
criterion = "0.2"
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
opt-level = 3
|
opt-level = 3
|
||||||
@@ -74,6 +75,7 @@ overflow-checks = true
|
|||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["mmap"]
|
default = ["mmap"]
|
||||||
|
forbench = []
|
||||||
mmap = ["atomicwrites", "fs2", "memmap", "notify"]
|
mmap = ["atomicwrites", "fs2", "memmap", "notify"]
|
||||||
lz4-compression = ["lz4"]
|
lz4-compression = ["lz4"]
|
||||||
failpoints = ["fail/failpoints"]
|
failpoints = ["fail/failpoints"]
|
||||||
@@ -97,3 +99,15 @@ features = ["failpoints"]
|
|||||||
name = "failpoints"
|
name = "failpoints"
|
||||||
path = "tests/failpoints/mod.rs"
|
path = "tests/failpoints/mod.rs"
|
||||||
required-features = ["fail/failpoints"]
|
required-features = ["fail/failpoints"]
|
||||||
|
|
||||||
|
[profile.bench]
|
||||||
|
lto = true
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "vint"
|
||||||
|
harness = false
|
||||||
|
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "fastfield"
|
||||||
|
harness = false
|
||||||
73
benches/bitset.rs
Normal file
73
benches/bitset.rs
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
use criterion::{criterion_group, criterion_main, Criterion};
|
||||||
|
use rand::distributions::{Bernoulli, Uniform};
|
||||||
|
use rand::rngs::StdRng;
|
||||||
|
use rand::{Rng, SeedableRng};
|
||||||
|
use tantivy::forbench::bitset::{BitSet, TinySet};
|
||||||
|
use tantivy::query::BitSetDocSet;
|
||||||
|
use tantivy::DocSet;
|
||||||
|
|
||||||
|
fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
|
||||||
|
StdRng::from_seed([seed_val; 32])
|
||||||
|
.sample_iter(&Bernoulli::new(ratio).unwrap())
|
||||||
|
.take(n as usize)
|
||||||
|
.enumerate()
|
||||||
|
.filter_map(|(val, keep)| if keep { Some(val as u32) } else { None })
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
|
||||||
|
let seed: [u8; 32] = [1; 32];
|
||||||
|
StdRng::from_seed(seed)
|
||||||
|
.sample_iter(&Uniform::new(0u32, max_value))
|
||||||
|
.take(n_elems)
|
||||||
|
.collect::<Vec<u32>>()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_tinyset_pop(criterion: &mut Criterion) {
|
||||||
|
criterion.bench_function("pop_lowest", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
let mut tinyset = TinySet::singleton(criterion::black_box(31u32));
|
||||||
|
tinyset.pop_lowest();
|
||||||
|
tinyset.pop_lowest();
|
||||||
|
tinyset.pop_lowest();
|
||||||
|
tinyset.pop_lowest();
|
||||||
|
tinyset.pop_lowest();
|
||||||
|
tinyset.pop_lowest();
|
||||||
|
})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_bitset_insert(criterion: &mut Criterion) {
|
||||||
|
criterion.bench_function_over_inputs(
|
||||||
|
"bitset_insert",
|
||||||
|
|bench, (max_value, n_elems)| {
|
||||||
|
let els = generate_nonunique_unsorted(*max_value, *n_elems);
|
||||||
|
bench.iter(move || {
|
||||||
|
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||||
|
for el in els.iter().cloned() {
|
||||||
|
bitset.insert(el);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
},
|
||||||
|
vec![(1_000_000u32, 10_000)],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_bitsetdocset_iterate(b: &mut test::Bencher) {
|
||||||
|
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||||
|
for el in sample_with_seed(1_000_000u32, 0.01, 0u8) {
|
||||||
|
bitset.insert(el);
|
||||||
|
}
|
||||||
|
b.iter(|| {
|
||||||
|
let mut docset = BitSetDocSet::from(bitset.clone());
|
||||||
|
while docset.advance() {}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(
|
||||||
|
benches,
|
||||||
|
bench_tinyset_pop,
|
||||||
|
bench_bitset_insert,
|
||||||
|
bench_bitsetdocset_iterate
|
||||||
|
);
|
||||||
|
criterion_main!(benches);
|
||||||
107
benches/fastfield.rs
Normal file
107
benches/fastfield.rs
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
use criterion::criterion_group;
|
||||||
|
use criterion::criterion_main;
|
||||||
|
use criterion::Criterion;
|
||||||
|
use criterion::ParameterizedBenchmark;
|
||||||
|
use rand::rngs::StdRng;
|
||||||
|
use rand::seq::SliceRandom;
|
||||||
|
use rand::SeedableRng;
|
||||||
|
use tantivy::schema::{Schema, FAST};
|
||||||
|
use tantivy::{doc, DocId, Index};
|
||||||
|
|
||||||
|
const NUM_LOOKUPS: usize = 1_000;
|
||||||
|
|
||||||
|
fn generate_permutation(stride: usize, bit_width: u8) -> Vec<u64> {
|
||||||
|
let mut permutation: Vec<u64> = (0u64..(NUM_LOOKUPS * stride) as u64).collect();
|
||||||
|
permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
|
||||||
|
permutation.push(1u64 << (bit_width as u64)); //< just to force the bit_width
|
||||||
|
permutation
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_linear_lookup(c: &mut Criterion) {
|
||||||
|
c.bench(
|
||||||
|
"lookup_stride",
|
||||||
|
ParameterizedBenchmark::new(
|
||||||
|
"baseline_vec",
|
||||||
|
|bench, (stride, num_bits)| {
|
||||||
|
let arr = generate_permutation(*stride, *num_bits);
|
||||||
|
bench.iter(move || {
|
||||||
|
let mut a = 0u64;
|
||||||
|
for i in (0..NUM_LOOKUPS / stride).map(|v| v * 7) {
|
||||||
|
a ^= arr[i as usize];
|
||||||
|
}
|
||||||
|
a
|
||||||
|
})
|
||||||
|
},
|
||||||
|
vec![(7, 1), (7, 5), (7, 20)],
|
||||||
|
)
|
||||||
|
.with_function("fastfield", |bench, (stride, num_bits)| {
|
||||||
|
let mut schema_builder = Schema::builder();
|
||||||
|
let val_field = schema_builder.add_u64_field("val", FAST);
|
||||||
|
let schema = schema_builder.build();
|
||||||
|
|
||||||
|
let index = Index::create_in_ram(schema);
|
||||||
|
let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
|
||||||
|
for el in generate_permutation(*stride, *num_bits) {
|
||||||
|
index_writer.add_document(doc!(val_field=>el));
|
||||||
|
}
|
||||||
|
index_writer.commit().unwrap();
|
||||||
|
let reader = index.reader().unwrap();
|
||||||
|
let searcher = reader.searcher();
|
||||||
|
let segment_reader = searcher.segment_reader(0u32);
|
||||||
|
let fast_field_reader = segment_reader.fast_fields().u64(val_field).unwrap();
|
||||||
|
bench.iter(move || {
|
||||||
|
let mut a = 0u64;
|
||||||
|
for i in (0..NUM_LOOKUPS / stride).map(|v| v * 7) {
|
||||||
|
a ^= fast_field_reader.get(i as DocId);
|
||||||
|
}
|
||||||
|
a
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_jumpy_lookup(c: &mut Criterion) {
|
||||||
|
c.bench(
|
||||||
|
"lookup_jumpy",
|
||||||
|
ParameterizedBenchmark::new(
|
||||||
|
"baseline_vec",
|
||||||
|
|bench, (stride, num_bits)| {
|
||||||
|
let arr = generate_permutation(*stride, *num_bits);
|
||||||
|
bench.iter(move || {
|
||||||
|
let mut a = 0u64;
|
||||||
|
for _ in 0..NUM_LOOKUPS {
|
||||||
|
a = arr[a as usize];
|
||||||
|
}
|
||||||
|
a
|
||||||
|
})
|
||||||
|
},
|
||||||
|
vec![(7, 1), (7, 5), (7, 20)],
|
||||||
|
)
|
||||||
|
.with_function("fastfield", |bench, (stride, num_bits)| {
|
||||||
|
let mut schema_builder = Schema::builder();
|
||||||
|
let val_field = schema_builder.add_u64_field("val", FAST);
|
||||||
|
let schema = schema_builder.build();
|
||||||
|
|
||||||
|
let index = Index::create_in_ram(schema);
|
||||||
|
let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
|
||||||
|
for el in generate_permutation(*stride, *num_bits) {
|
||||||
|
index_writer.add_document(doc!(val_field=>el));
|
||||||
|
}
|
||||||
|
index_writer.commit().unwrap();
|
||||||
|
let reader = index.reader().unwrap();
|
||||||
|
let searcher = reader.searcher();
|
||||||
|
let segment_reader = searcher.segment_reader(0u32);
|
||||||
|
let fast_field_reader = segment_reader.fast_fields().u64(val_field).unwrap();
|
||||||
|
bench.iter(move || {
|
||||||
|
let mut a = 0u64;
|
||||||
|
for _ in 0..NUM_LOOKUPS {
|
||||||
|
a = fast_field_reader.get(a as DocId);
|
||||||
|
}
|
||||||
|
a
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, bench_linear_lookup, bench_jumpy_lookup);
|
||||||
|
criterion_main!(benches);
|
||||||
50
benches/union.rs
Normal file
50
benches/union.rs
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
use criterion::{criterion_group, criterion_main, Criterion};
|
||||||
|
use rand::rngs::StdRng;
|
||||||
|
use rand::{Rng, SeedableRng};
|
||||||
|
use tantivy::query::QueryParser;
|
||||||
|
use tantivy::schema::{Schema, STRING};
|
||||||
|
use tantivy::{Document, Index};
|
||||||
|
|
||||||
|
fn bench_union(criterion: &mut Criterion) {
|
||||||
|
criterion.bench_function_over_inputs(
|
||||||
|
"union_docset_fulladvance",
|
||||||
|
|bench, (ratio_left, ratio_right)| {
|
||||||
|
let mut schema_builder = Schema::builder();
|
||||||
|
let field = schema_builder.add_text_field("val", STRING);
|
||||||
|
let schema = schema_builder.build();
|
||||||
|
let index = Index::create_in_ram(schema);
|
||||||
|
let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
|
||||||
|
let mut stdrng = StdRng::from_seed([0u8; 32]);
|
||||||
|
for _ in 0u32..100_000u32 {
|
||||||
|
let mut doc = Document::default();
|
||||||
|
if stdrng.gen_bool(*ratio_left) {
|
||||||
|
doc.add_text(field, "left");
|
||||||
|
}
|
||||||
|
if stdrng.gen_bool(*ratio_right) {
|
||||||
|
doc.add_text(field, "right");
|
||||||
|
}
|
||||||
|
index_writer.add_document(doc);
|
||||||
|
}
|
||||||
|
index_writer.commit().unwrap();
|
||||||
|
let reader = index.reader().unwrap();
|
||||||
|
let searcher = reader.searcher();
|
||||||
|
|
||||||
|
let query = QueryParser::for_index(&index, vec![field])
|
||||||
|
.parse_query("left right")
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
bench.iter(move || {
|
||||||
|
let weight = query.weight(&searcher, false).unwrap();
|
||||||
|
let mut scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
|
||||||
|
let mut sum_docs = 0u64;
|
||||||
|
scorer.for_each(&mut |doc_id, _score| {
|
||||||
|
sum_docs += doc_id as u64;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
},
|
||||||
|
vec![(0.2, 0.1), (0.2, 0.02)],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, bench_union);
|
||||||
|
criterion_main!(benches);
|
||||||
72
benches/vint.rs
Normal file
72
benches/vint.rs
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
use criterion::{criterion_group, criterion_main, Criterion, ParameterizedBenchmark};
|
||||||
|
use rand::rngs::StdRng;
|
||||||
|
use rand::Rng;
|
||||||
|
use rand::SeedableRng;
|
||||||
|
use tantivy::forbench::compression::{compressed_block_size, BlockDecoder};
|
||||||
|
use tantivy::forbench::compression::{BlockEncoder, VIntEncoder};
|
||||||
|
use tantivy::forbench::compression::{VIntDecoder, COMPRESSION_BLOCK_SIZE};
|
||||||
|
|
||||||
|
fn generate_array_with_seed(n: usize, ratio: f64, seed_val: u8) -> Vec<u32> {
|
||||||
|
let seed: [u8; 32] = [seed_val; 32];
|
||||||
|
let mut rng = StdRng::from_seed(seed);
|
||||||
|
(0u32..).filter(|_| rng.gen_bool(ratio)).take(n).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_array(n: usize, ratio: f64) -> Vec<u32> {
|
||||||
|
generate_array_with_seed(n, ratio, 4)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_compress(criterion: &mut Criterion) {
|
||||||
|
criterion.bench(
|
||||||
|
"compress_sorted",
|
||||||
|
ParameterizedBenchmark::new(
|
||||||
|
"bitpack",
|
||||||
|
|bench, ratio| {
|
||||||
|
let mut encoder = BlockEncoder::new();
|
||||||
|
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
|
||||||
|
bench.iter(|| {
|
||||||
|
encoder.compress_block_sorted(&data, 0u32);
|
||||||
|
});
|
||||||
|
},
|
||||||
|
vec![0.1],
|
||||||
|
)
|
||||||
|
.with_function("vint", |bench, ratio| {
|
||||||
|
let mut encoder = BlockEncoder::new();
|
||||||
|
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
|
||||||
|
bench.iter(|| {
|
||||||
|
encoder.compress_vint_sorted(&data, 0u32);
|
||||||
|
});
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_uncompress(criterion: &mut Criterion) {
|
||||||
|
criterion.bench(
|
||||||
|
"uncompress_sorted",
|
||||||
|
ParameterizedBenchmark::new(
|
||||||
|
"bitpack",
|
||||||
|
|bench, ratio| {
|
||||||
|
let mut encoder = BlockEncoder::new();
|
||||||
|
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
|
||||||
|
let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
|
||||||
|
let mut decoder = BlockDecoder::new();
|
||||||
|
bench.iter(|| {
|
||||||
|
decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
|
||||||
|
});
|
||||||
|
},
|
||||||
|
vec![0.1],
|
||||||
|
)
|
||||||
|
.with_function("vint", |bench, ratio| {
|
||||||
|
let mut encoder = BlockEncoder::new();
|
||||||
|
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
|
||||||
|
let compressed = encoder.compress_vint_sorted(&data, 0u32);
|
||||||
|
let mut decoder = BlockDecoder::new();
|
||||||
|
bench.iter(move || {
|
||||||
|
decoder.uncompress_vint_sorted(compressed, 0u32, COMPRESSION_BLOCK_SIZE);
|
||||||
|
});
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, bench_compress, bench_uncompress);
|
||||||
|
criterion_main!(benches);
|
||||||
2
run-bench.rs
Executable file
2
run-bench.rs
Executable file
@@ -0,0 +1,2 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
cargo bench --features forbench
|
||||||
@@ -86,6 +86,7 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
pub fn get(&self, idx: u64) -> u64 {
|
pub fn get(&self, idx: u64) -> u64 {
|
||||||
if self.num_bits == 0 {
|
if self.num_bits == 0 {
|
||||||
return 0u64;
|
return 0u64;
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::fmt;
|
|||||||
use std::u64;
|
use std::u64;
|
||||||
|
|
||||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||||
pub(crate) struct TinySet(u64);
|
pub struct TinySet(u64);
|
||||||
|
|
||||||
impl fmt::Debug for TinySet {
|
impl fmt::Debug for TinySet {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
@@ -353,43 +353,3 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(all(test, feature = "unstable"))]
|
|
||||||
mod bench {
|
|
||||||
|
|
||||||
use super::BitSet;
|
|
||||||
use super::TinySet;
|
|
||||||
use test;
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_tinyset_pop(b: &mut test::Bencher) {
|
|
||||||
b.iter(|| {
|
|
||||||
let mut tinyset = TinySet::singleton(test::black_box(31u32));
|
|
||||||
tinyset.pop_lowest();
|
|
||||||
tinyset.pop_lowest();
|
|
||||||
tinyset.pop_lowest();
|
|
||||||
tinyset.pop_lowest();
|
|
||||||
tinyset.pop_lowest();
|
|
||||||
tinyset.pop_lowest();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_tinyset_sum(b: &mut test::Bencher) {
|
|
||||||
let tiny_set = TinySet::empty().insert(10u32).insert(14u32).insert(21u32);
|
|
||||||
b.iter(|| {
|
|
||||||
assert_eq!(test::black_box(tiny_set).into_iter().sum::<u32>(), 45u32);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_tinyarr_sum(b: &mut test::Bencher) {
|
|
||||||
let v = [10u32, 14u32, 21u32];
|
|
||||||
b.iter(|| test::black_box(v).iter().cloned().sum::<u32>());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_bitset_initialize(b: &mut test::Bencher) {
|
|
||||||
b.iter(|| BitSet::with_max_value(1_000_000));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ mod serialize;
|
|||||||
mod vint;
|
mod vint;
|
||||||
|
|
||||||
pub use self::bitset::BitSet;
|
pub use self::bitset::BitSet;
|
||||||
pub(crate) use self::bitset::TinySet;
|
pub use self::bitset::TinySet;
|
||||||
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
|
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
|
||||||
pub use self::counting_writer::CountingWriter;
|
pub use self::counting_writer::CountingWriter;
|
||||||
pub use self::serialize::{BinarySerializable, FixedSize};
|
pub use self::serialize::{BinarySerializable, FixedSize};
|
||||||
|
|||||||
@@ -431,111 +431,3 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(all(test, feature = "unstable"))]
|
|
||||||
mod bench {
|
|
||||||
use super::tests::FIELD;
|
|
||||||
use super::tests::{generate_permutation, SCHEMA};
|
|
||||||
use super::*;
|
|
||||||
use common::CompositeFile;
|
|
||||||
use directory::{Directory, RAMDirectory, WritePtr};
|
|
||||||
use fastfield::FastFieldReader;
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::path::Path;
|
|
||||||
use test::{self, Bencher};
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_intfastfield_linear_veclookup(b: &mut Bencher) {
|
|
||||||
let permutation = generate_permutation();
|
|
||||||
b.iter(|| {
|
|
||||||
let n = test::black_box(7000u32);
|
|
||||||
let mut a = 0u64;
|
|
||||||
for i in (0u32..n / 7).map(|v| v * 7) {
|
|
||||||
a ^= permutation[i as usize];
|
|
||||||
}
|
|
||||||
a
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_intfastfield_veclookup(b: &mut Bencher) {
|
|
||||||
let permutation = generate_permutation();
|
|
||||||
b.iter(|| {
|
|
||||||
let n = test::black_box(1000u32);
|
|
||||||
let mut a = 0u64;
|
|
||||||
for _ in 0u32..n {
|
|
||||||
a = permutation[a as usize];
|
|
||||||
}
|
|
||||||
a
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_intfastfield_linear_fflookup(b: &mut Bencher) {
|
|
||||||
let path = Path::new("test");
|
|
||||||
let permutation = generate_permutation();
|
|
||||||
let mut directory: RAMDirectory = RAMDirectory::create();
|
|
||||||
{
|
|
||||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
||||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
|
||||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
|
||||||
for &x in &permutation {
|
|
||||||
fast_field_writers.add_document(&doc!(*FIELD=>x));
|
|
||||||
}
|
|
||||||
fast_field_writers
|
|
||||||
.serialize(&mut serializer, &HashMap::new())
|
|
||||||
.unwrap();
|
|
||||||
serializer.close().unwrap();
|
|
||||||
}
|
|
||||||
let source = directory.open_read(&path).unwrap();
|
|
||||||
{
|
|
||||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
|
||||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
|
||||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
|
||||||
|
|
||||||
b.iter(|| {
|
|
||||||
let n = test::black_box(7000u32);
|
|
||||||
let mut a = 0u64;
|
|
||||||
for i in (0u32..n / 7).map(|val| val * 7) {
|
|
||||||
a ^= fast_field_reader.get(i);
|
|
||||||
}
|
|
||||||
a
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_intfastfield_fflookup(b: &mut Bencher) {
|
|
||||||
let path = Path::new("test");
|
|
||||||
let permutation = generate_permutation();
|
|
||||||
let mut directory: RAMDirectory = RAMDirectory::create();
|
|
||||||
{
|
|
||||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
|
||||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
|
||||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
|
||||||
for &x in &permutation {
|
|
||||||
fast_field_writers.add_document(&doc!(*FIELD=>x));
|
|
||||||
}
|
|
||||||
fast_field_writers
|
|
||||||
.serialize(&mut serializer, &HashMap::new())
|
|
||||||
.unwrap();
|
|
||||||
serializer.close().unwrap();
|
|
||||||
}
|
|
||||||
let source = directory.open_read(&path).unwrap();
|
|
||||||
{
|
|
||||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
|
||||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
|
||||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
|
||||||
|
|
||||||
b.iter(|| {
|
|
||||||
let n = test::black_box(1000u32);
|
|
||||||
let mut a = 0u32;
|
|
||||||
for _ in 0u32..n {
|
|
||||||
a = fast_field_reader.get(a) as u32;
|
|
||||||
}
|
|
||||||
a
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -67,10 +67,12 @@ impl<Item: FastValue> FastFieldReader<Item> {
|
|||||||
///
|
///
|
||||||
/// May panic if `doc` is greater than the segment
|
/// May panic if `doc` is greater than the segment
|
||||||
// `maxdoc`.
|
// `maxdoc`.
|
||||||
|
#[inline(always)]
|
||||||
pub fn get(&self, doc: DocId) -> Item {
|
pub fn get(&self, doc: DocId) -> Item {
|
||||||
self.get_u64(u64::from(doc))
|
self.get_u64(u64::from(doc))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
pub(crate) fn get_u64(&self, doc: u64) -> Item {
|
pub(crate) fn get_u64(&self, doc: u64) -> Item {
|
||||||
Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc))
|
Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc))
|
||||||
}
|
}
|
||||||
|
|||||||
13
src/lib.rs
13
src/lib.rs
@@ -249,7 +249,6 @@ pub struct DocAddress(pub SegmentLocalId, pub DocId);
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
|
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
|
||||||
use crate::core::SegmentReader;
|
use crate::core::SegmentReader;
|
||||||
use crate::docset::DocSet;
|
use crate::docset::DocSet;
|
||||||
@@ -895,3 +894,15 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "forbench")]
|
||||||
|
pub mod forbench {
|
||||||
|
pub mod compression {
|
||||||
|
pub use crate::postings::compression::*;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub mod bitset {
|
||||||
|
pub use crate::common::BitSet;
|
||||||
|
pub use crate::common::TinySet;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -160,9 +160,9 @@ impl VIntEncoder for BlockEncoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl VIntDecoder for BlockDecoder {
|
impl VIntDecoder for BlockDecoder {
|
||||||
fn uncompress_vint_sorted<'a>(
|
fn uncompress_vint_sorted(
|
||||||
&mut self,
|
&mut self,
|
||||||
compressed_data: &'a [u8],
|
compressed_data: &[u8],
|
||||||
offset: u32,
|
offset: u32,
|
||||||
num_els: usize,
|
num_els: usize,
|
||||||
) -> usize {
|
) -> usize {
|
||||||
@@ -170,7 +170,7 @@ impl VIntDecoder for BlockDecoder {
|
|||||||
vint::uncompress_sorted(compressed_data, &mut self.output.0[..num_els], offset)
|
vint::uncompress_sorted(compressed_data, &mut self.output.0[..num_els], offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn uncompress_vint_unsorted<'a>(&mut self, compressed_data: &'a [u8], num_els: usize) -> usize {
|
fn uncompress_vint_unsorted(&mut self, compressed_data: &[u8], num_els: usize) -> usize {
|
||||||
self.output_len = num_els;
|
self.output_len = num_els;
|
||||||
vint::uncompress_unsorted(compressed_data, &mut self.output.0[..num_els])
|
vint::uncompress_unsorted(compressed_data, &mut self.output.0[..num_els])
|
||||||
}
|
}
|
||||||
@@ -268,78 +268,17 @@ pub mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(all(test, feature = "unstable"))]
|
|
||||||
mod bench {
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
use rand::SeedableRng;
|
|
||||||
use rand::{Rng, XorShiftRng};
|
|
||||||
use test::Bencher;
|
|
||||||
|
|
||||||
fn generate_array_with_seed(n: usize, ratio: f64, seed_val: u8) -> Vec<u32> {
|
|
||||||
let seed: &[u8; 16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, seed_val];
|
|
||||||
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
|
|
||||||
(0u32..).filter(|_| rng.gen_bool(ratio)).take(n).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn generate_array(n: usize, ratio: f64) -> Vec<u32> {
|
|
||||||
generate_array_with_seed(n, ratio, 4)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_compress(b: &mut Bencher) {
|
|
||||||
let mut encoder = BlockEncoder::new();
|
|
||||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
|
|
||||||
b.iter(|| {
|
|
||||||
encoder.compress_block_sorted(&data, 0u32);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_uncompress(b: &mut Bencher) {
|
|
||||||
let mut encoder = BlockEncoder::new();
|
|
||||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
|
|
||||||
let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
|
|
||||||
let mut decoder = BlockDecoder::new();
|
|
||||||
b.iter(|| {
|
|
||||||
decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_all_docs_compression_numbits() {
|
fn test_all_docs_compression_numbits() {
|
||||||
for expected_num_bits in 0u8.. {
|
for expected_num_bits in 0u8..33u8 {
|
||||||
let mut data = [0u32; 128];
|
let mut data = [0u32; 128];
|
||||||
if expected_num_bits > 0 {
|
if expected_num_bits > 0 {
|
||||||
data[0] = (1u64 << (expected_num_bits as usize) - 1) as u32;
|
data[0] = (1u64 << (expected_num_bits as u64) - 1u64) as u32;
|
||||||
}
|
}
|
||||||
let mut encoder = BlockEncoder::new();
|
let mut encoder = BlockEncoder::new();
|
||||||
let (num_bits, compressed) = encoder.compress_block_unsorted(&data);
|
let (num_bits, compressed) = encoder.compress_block_unsorted(&data);
|
||||||
assert_eq!(compressed.len(), compressed_block_size(num_bits));
|
assert_eq!(compressed.len(), compressed_block_size(num_bits));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const NUM_INTS_BENCH_VINT: usize = 10;
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_compress_vint(b: &mut Bencher) {
|
|
||||||
let mut encoder = BlockEncoder::new();
|
|
||||||
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
|
|
||||||
b.iter(|| {
|
|
||||||
encoder.compress_vint_sorted(&data, 0u32);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_uncompress_vint(b: &mut Bencher) {
|
|
||||||
let mut encoder = BlockEncoder::new();
|
|
||||||
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
|
|
||||||
let compressed = encoder.compress_vint_sorted(&data, 0u32);
|
|
||||||
let mut decoder = BlockDecoder::new();
|
|
||||||
b.iter(|| {
|
|
||||||
decoder.uncompress_vint_sorted(compressed, 0u32, NUM_INTS_BENCH_VINT);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ Postings module (also called inverted index)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
mod block_search;
|
mod block_search;
|
||||||
|
|
||||||
pub(crate) mod compression;
|
pub(crate) mod compression;
|
||||||
/// Postings module
|
/// Postings module
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -218,49 +218,3 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(all(test, feature = "unstable"))]
|
|
||||||
mod bench {
|
|
||||||
|
|
||||||
use super::BitSet;
|
|
||||||
use super::BitSetDocSet;
|
|
||||||
use test;
|
|
||||||
use tests;
|
|
||||||
use DocSet;
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_bitset_1pct_insert(b: &mut test::Bencher) {
|
|
||||||
use tests;
|
|
||||||
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
|
|
||||||
b.iter(|| {
|
|
||||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
|
||||||
for el in els.iter().cloned() {
|
|
||||||
bitset.insert(el);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_bitset_1pct_clone(b: &mut test::Bencher) {
|
|
||||||
use tests;
|
|
||||||
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
|
|
||||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
|
||||||
for el in els {
|
|
||||||
bitset.insert(el);
|
|
||||||
}
|
|
||||||
b.iter(|| bitset.clone());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_bitset_1pct_clone_iterate(b: &mut test::Bencher) {
|
|
||||||
let els = tests::sample(1_000_000u32, 0.01);
|
|
||||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
|
||||||
for el in els {
|
|
||||||
bitset.insert(el);
|
|
||||||
}
|
|
||||||
b.iter(|| {
|
|
||||||
let mut docset = BitSetDocSet::from(bitset.clone());
|
|
||||||
while docset.advance() {}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -411,52 +411,3 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(all(test, feature = "unstable"))]
|
|
||||||
mod bench {
|
|
||||||
|
|
||||||
use query::score_combiner::DoNothingCombiner;
|
|
||||||
use query::ConstScorer;
|
|
||||||
use query::Union;
|
|
||||||
use query::VecDocSet;
|
|
||||||
use test::Bencher;
|
|
||||||
use tests;
|
|
||||||
use DocId;
|
|
||||||
use DocSet;
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_union_3_high(bench: &mut Bencher) {
|
|
||||||
let union_docset: Vec<Vec<DocId>> = vec![
|
|
||||||
tests::sample_with_seed(100_000, 0.1, 0),
|
|
||||||
tests::sample_with_seed(100_000, 0.2, 1),
|
|
||||||
];
|
|
||||||
bench.iter(|| {
|
|
||||||
let mut v = Union::<_, DoNothingCombiner>::from(
|
|
||||||
union_docset
|
|
||||||
.iter()
|
|
||||||
.map(|doc_ids| VecDocSet::from(doc_ids.clone()))
|
|
||||||
.map(ConstScorer::new)
|
|
||||||
.collect::<Vec<_>>(),
|
|
||||||
);
|
|
||||||
while v.advance() {}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
#[bench]
|
|
||||||
fn bench_union_3_low(bench: &mut Bencher) {
|
|
||||||
let union_docset: Vec<Vec<DocId>> = vec![
|
|
||||||
tests::sample_with_seed(100_000, 0.01, 0),
|
|
||||||
tests::sample_with_seed(100_000, 0.05, 1),
|
|
||||||
tests::sample_with_seed(100_000, 0.001, 2),
|
|
||||||
];
|
|
||||||
bench.iter(|| {
|
|
||||||
let mut v = Union::<_, DoNothingCombiner>::from(
|
|
||||||
union_docset
|
|
||||||
.iter()
|
|
||||||
.map(|doc_ids| VecDocSet::from(doc_ids.clone()))
|
|
||||||
.map(ConstScorer::new)
|
|
||||||
.collect::<Vec<_>>(),
|
|
||||||
);
|
|
||||||
while v.advance() {}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user