mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-28 13:02:55 +00:00
Compare commits
2 Commits
bugfix-pos
...
criterion
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a854a60e2a | ||
|
|
92d73a6bfb |
14
Cargo.toml
14
Cargo.toml
@@ -62,6 +62,7 @@ rand = "0.7"
|
||||
maplit = "1"
|
||||
matches = "0.1.8"
|
||||
time = "0.1.42"
|
||||
criterion = "0.2"
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
@@ -74,6 +75,7 @@ overflow-checks = true
|
||||
|
||||
[features]
|
||||
default = ["mmap"]
|
||||
forbench = []
|
||||
mmap = ["atomicwrites", "fs2", "memmap", "notify"]
|
||||
lz4-compression = ["lz4"]
|
||||
failpoints = ["fail/failpoints"]
|
||||
@@ -97,3 +99,15 @@ features = ["failpoints"]
|
||||
name = "failpoints"
|
||||
path = "tests/failpoints/mod.rs"
|
||||
required-features = ["fail/failpoints"]
|
||||
|
||||
[profile.bench]
|
||||
lto = true
|
||||
|
||||
[[bench]]
|
||||
name = "vint"
|
||||
harness = false
|
||||
|
||||
|
||||
[[bench]]
|
||||
name = "fastfield"
|
||||
harness = false
|
||||
73
benches/bitset.rs
Normal file
73
benches/bitset.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use rand::distributions::{Bernoulli, Uniform};
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use tantivy::forbench::bitset::{BitSet, TinySet};
|
||||
use tantivy::query::BitSetDocSet;
|
||||
use tantivy::DocSet;
|
||||
|
||||
fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
|
||||
StdRng::from_seed([seed_val; 32])
|
||||
.sample_iter(&Bernoulli::new(ratio).unwrap())
|
||||
.take(n as usize)
|
||||
.enumerate()
|
||||
.filter_map(|(val, keep)| if keep { Some(val as u32) } else { None })
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
|
||||
let seed: [u8; 32] = [1; 32];
|
||||
StdRng::from_seed(seed)
|
||||
.sample_iter(&Uniform::new(0u32, max_value))
|
||||
.take(n_elems)
|
||||
.collect::<Vec<u32>>()
|
||||
}
|
||||
|
||||
fn bench_tinyset_pop(criterion: &mut Criterion) {
|
||||
criterion.bench_function("pop_lowest", |b| {
|
||||
b.iter(|| {
|
||||
let mut tinyset = TinySet::singleton(criterion::black_box(31u32));
|
||||
tinyset.pop_lowest();
|
||||
tinyset.pop_lowest();
|
||||
tinyset.pop_lowest();
|
||||
tinyset.pop_lowest();
|
||||
tinyset.pop_lowest();
|
||||
tinyset.pop_lowest();
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_bitset_insert(criterion: &mut Criterion) {
|
||||
criterion.bench_function_over_inputs(
|
||||
"bitset_insert",
|
||||
|bench, (max_value, n_elems)| {
|
||||
let els = generate_nonunique_unsorted(*max_value, *n_elems);
|
||||
bench.iter(move || {
|
||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||
for el in els.iter().cloned() {
|
||||
bitset.insert(el);
|
||||
}
|
||||
});
|
||||
},
|
||||
vec![(1_000_000u32, 10_000)],
|
||||
);
|
||||
}
|
||||
|
||||
fn bench_bitsetdocset_iterate(b: &mut test::Bencher) {
|
||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||
for el in sample_with_seed(1_000_000u32, 0.01, 0u8) {
|
||||
bitset.insert(el);
|
||||
}
|
||||
b.iter(|| {
|
||||
let mut docset = BitSetDocSet::from(bitset.clone());
|
||||
while docset.advance() {}
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_tinyset_pop,
|
||||
bench_bitset_insert,
|
||||
bench_bitsetdocset_iterate
|
||||
);
|
||||
criterion_main!(benches);
|
||||
107
benches/fastfield.rs
Normal file
107
benches/fastfield.rs
Normal file
@@ -0,0 +1,107 @@
|
||||
use criterion::criterion_group;
|
||||
use criterion::criterion_main;
|
||||
use criterion::Criterion;
|
||||
use criterion::ParameterizedBenchmark;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::SeedableRng;
|
||||
use tantivy::schema::{Schema, FAST};
|
||||
use tantivy::{doc, DocId, Index};
|
||||
|
||||
const NUM_LOOKUPS: usize = 1_000;
|
||||
|
||||
fn generate_permutation(stride: usize, bit_width: u8) -> Vec<u64> {
|
||||
let mut permutation: Vec<u64> = (0u64..(NUM_LOOKUPS * stride) as u64).collect();
|
||||
permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
|
||||
permutation.push(1u64 << (bit_width as u64)); //< just to force the bit_width
|
||||
permutation
|
||||
}
|
||||
|
||||
fn bench_linear_lookup(c: &mut Criterion) {
|
||||
c.bench(
|
||||
"lookup_stride",
|
||||
ParameterizedBenchmark::new(
|
||||
"baseline_vec",
|
||||
|bench, (stride, num_bits)| {
|
||||
let arr = generate_permutation(*stride, *num_bits);
|
||||
bench.iter(move || {
|
||||
let mut a = 0u64;
|
||||
for i in (0..NUM_LOOKUPS / stride).map(|v| v * 7) {
|
||||
a ^= arr[i as usize];
|
||||
}
|
||||
a
|
||||
})
|
||||
},
|
||||
vec![(7, 1), (7, 5), (7, 20)],
|
||||
)
|
||||
.with_function("fastfield", |bench, (stride, num_bits)| {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let val_field = schema_builder.add_u64_field("val", FAST);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
|
||||
for el in generate_permutation(*stride, *num_bits) {
|
||||
index_writer.add_document(doc!(val_field=>el));
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let segment_reader = searcher.segment_reader(0u32);
|
||||
let fast_field_reader = segment_reader.fast_fields().u64(val_field).unwrap();
|
||||
bench.iter(move || {
|
||||
let mut a = 0u64;
|
||||
for i in (0..NUM_LOOKUPS / stride).map(|v| v * 7) {
|
||||
a ^= fast_field_reader.get(i as DocId);
|
||||
}
|
||||
a
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
fn bench_jumpy_lookup(c: &mut Criterion) {
|
||||
c.bench(
|
||||
"lookup_jumpy",
|
||||
ParameterizedBenchmark::new(
|
||||
"baseline_vec",
|
||||
|bench, (stride, num_bits)| {
|
||||
let arr = generate_permutation(*stride, *num_bits);
|
||||
bench.iter(move || {
|
||||
let mut a = 0u64;
|
||||
for _ in 0..NUM_LOOKUPS {
|
||||
a = arr[a as usize];
|
||||
}
|
||||
a
|
||||
})
|
||||
},
|
||||
vec![(7, 1), (7, 5), (7, 20)],
|
||||
)
|
||||
.with_function("fastfield", |bench, (stride, num_bits)| {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let val_field = schema_builder.add_u64_field("val", FAST);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
|
||||
for el in generate_permutation(*stride, *num_bits) {
|
||||
index_writer.add_document(doc!(val_field=>el));
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let segment_reader = searcher.segment_reader(0u32);
|
||||
let fast_field_reader = segment_reader.fast_fields().u64(val_field).unwrap();
|
||||
bench.iter(move || {
|
||||
let mut a = 0u64;
|
||||
for _ in 0..NUM_LOOKUPS {
|
||||
a = fast_field_reader.get(a as DocId);
|
||||
}
|
||||
a
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_linear_lookup, bench_jumpy_lookup);
|
||||
criterion_main!(benches);
|
||||
50
benches/union.rs
Normal file
50
benches/union.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use tantivy::query::QueryParser;
|
||||
use tantivy::schema::{Schema, STRING};
|
||||
use tantivy::{Document, Index};
|
||||
|
||||
fn bench_union(criterion: &mut Criterion) {
|
||||
criterion.bench_function_over_inputs(
|
||||
"union_docset_fulladvance",
|
||||
|bench, (ratio_left, ratio_right)| {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let field = schema_builder.add_text_field("val", STRING);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_with_num_threads(1, 80_000_000).unwrap();
|
||||
let mut stdrng = StdRng::from_seed([0u8; 32]);
|
||||
for _ in 0u32..100_000u32 {
|
||||
let mut doc = Document::default();
|
||||
if stdrng.gen_bool(*ratio_left) {
|
||||
doc.add_text(field, "left");
|
||||
}
|
||||
if stdrng.gen_bool(*ratio_right) {
|
||||
doc.add_text(field, "right");
|
||||
}
|
||||
index_writer.add_document(doc);
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
|
||||
let query = QueryParser::for_index(&index, vec![field])
|
||||
.parse_query("left right")
|
||||
.unwrap();
|
||||
|
||||
bench.iter(move || {
|
||||
let weight = query.weight(&searcher, false).unwrap();
|
||||
let mut scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
|
||||
let mut sum_docs = 0u64;
|
||||
scorer.for_each(&mut |doc_id, _score| {
|
||||
sum_docs += doc_id as u64;
|
||||
});
|
||||
});
|
||||
},
|
||||
vec![(0.2, 0.1), (0.2, 0.02)],
|
||||
);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_union);
|
||||
criterion_main!(benches);
|
||||
72
benches/vint.rs
Normal file
72
benches/vint.rs
Normal file
@@ -0,0 +1,72 @@
|
||||
use criterion::{criterion_group, criterion_main, Criterion, ParameterizedBenchmark};
|
||||
use rand::rngs::StdRng;
|
||||
use rand::Rng;
|
||||
use rand::SeedableRng;
|
||||
use tantivy::forbench::compression::{compressed_block_size, BlockDecoder};
|
||||
use tantivy::forbench::compression::{BlockEncoder, VIntEncoder};
|
||||
use tantivy::forbench::compression::{VIntDecoder, COMPRESSION_BLOCK_SIZE};
|
||||
|
||||
fn generate_array_with_seed(n: usize, ratio: f64, seed_val: u8) -> Vec<u32> {
|
||||
let seed: [u8; 32] = [seed_val; 32];
|
||||
let mut rng = StdRng::from_seed(seed);
|
||||
(0u32..).filter(|_| rng.gen_bool(ratio)).take(n).collect()
|
||||
}
|
||||
|
||||
pub fn generate_array(n: usize, ratio: f64) -> Vec<u32> {
|
||||
generate_array_with_seed(n, ratio, 4)
|
||||
}
|
||||
|
||||
fn bench_compress(criterion: &mut Criterion) {
|
||||
criterion.bench(
|
||||
"compress_sorted",
|
||||
ParameterizedBenchmark::new(
|
||||
"bitpack",
|
||||
|bench, ratio| {
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
|
||||
bench.iter(|| {
|
||||
encoder.compress_block_sorted(&data, 0u32);
|
||||
});
|
||||
},
|
||||
vec![0.1],
|
||||
)
|
||||
.with_function("vint", |bench, ratio| {
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
|
||||
bench.iter(|| {
|
||||
encoder.compress_vint_sorted(&data, 0u32);
|
||||
});
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
fn bench_uncompress(criterion: &mut Criterion) {
|
||||
criterion.bench(
|
||||
"uncompress_sorted",
|
||||
ParameterizedBenchmark::new(
|
||||
"bitpack",
|
||||
|bench, ratio| {
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
|
||||
let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
|
||||
let mut decoder = BlockDecoder::new();
|
||||
bench.iter(|| {
|
||||
decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
|
||||
});
|
||||
},
|
||||
vec![0.1],
|
||||
)
|
||||
.with_function("vint", |bench, ratio| {
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, *ratio);
|
||||
let compressed = encoder.compress_vint_sorted(&data, 0u32);
|
||||
let mut decoder = BlockDecoder::new();
|
||||
bench.iter(move || {
|
||||
decoder.uncompress_vint_sorted(compressed, 0u32, COMPRESSION_BLOCK_SIZE);
|
||||
});
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_compress, bench_uncompress);
|
||||
criterion_main!(benches);
|
||||
2
run-bench.rs
Executable file
2
run-bench.rs
Executable file
@@ -0,0 +1,2 @@
|
||||
#!/usr/bin/env bash
|
||||
cargo bench --features forbench
|
||||
@@ -86,6 +86,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn get(&self, idx: u64) -> u64 {
|
||||
if self.num_bits == 0 {
|
||||
return 0u64;
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::fmt;
|
||||
use std::u64;
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
pub(crate) struct TinySet(u64);
|
||||
pub struct TinySet(u64);
|
||||
|
||||
impl fmt::Debug for TinySet {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
@@ -353,43 +353,3 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
|
||||
use super::BitSet;
|
||||
use super::TinySet;
|
||||
use test;
|
||||
|
||||
#[bench]
|
||||
fn bench_tinyset_pop(b: &mut test::Bencher) {
|
||||
b.iter(|| {
|
||||
let mut tinyset = TinySet::singleton(test::black_box(31u32));
|
||||
tinyset.pop_lowest();
|
||||
tinyset.pop_lowest();
|
||||
tinyset.pop_lowest();
|
||||
tinyset.pop_lowest();
|
||||
tinyset.pop_lowest();
|
||||
tinyset.pop_lowest();
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_tinyset_sum(b: &mut test::Bencher) {
|
||||
let tiny_set = TinySet::empty().insert(10u32).insert(14u32).insert(21u32);
|
||||
b.iter(|| {
|
||||
assert_eq!(test::black_box(tiny_set).into_iter().sum::<u32>(), 45u32);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_tinyarr_sum(b: &mut test::Bencher) {
|
||||
let v = [10u32, 14u32, 21u32];
|
||||
b.iter(|| test::black_box(v).iter().cloned().sum::<u32>());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_bitset_initialize(b: &mut test::Bencher) {
|
||||
b.iter(|| BitSet::with_max_value(1_000_000));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ mod serialize;
|
||||
mod vint;
|
||||
|
||||
pub use self::bitset::BitSet;
|
||||
pub(crate) use self::bitset::TinySet;
|
||||
pub use self::bitset::TinySet;
|
||||
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
|
||||
pub use self::counting_writer::CountingWriter;
|
||||
pub use self::serialize::{BinarySerializable, FixedSize};
|
||||
|
||||
@@ -431,111 +431,3 @@ mod tests {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
use super::tests::FIELD;
|
||||
use super::tests::{generate_permutation, SCHEMA};
|
||||
use super::*;
|
||||
use common::CompositeFile;
|
||||
use directory::{Directory, RAMDirectory, WritePtr};
|
||||
use fastfield::FastFieldReader;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use test::{self, Bencher};
|
||||
|
||||
#[bench]
|
||||
fn bench_intfastfield_linear_veclookup(b: &mut Bencher) {
|
||||
let permutation = generate_permutation();
|
||||
b.iter(|| {
|
||||
let n = test::black_box(7000u32);
|
||||
let mut a = 0u64;
|
||||
for i in (0u32..n / 7).map(|v| v * 7) {
|
||||
a ^= permutation[i as usize];
|
||||
}
|
||||
a
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_intfastfield_veclookup(b: &mut Bencher) {
|
||||
let permutation = generate_permutation();
|
||||
b.iter(|| {
|
||||
let n = test::black_box(1000u32);
|
||||
let mut a = 0u64;
|
||||
for _ in 0u32..n {
|
||||
a = permutation[a as usize];
|
||||
}
|
||||
a
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_intfastfield_linear_fflookup(b: &mut Bencher) {
|
||||
let path = Path::new("test");
|
||||
let permutation = generate_permutation();
|
||||
let mut directory: RAMDirectory = RAMDirectory::create();
|
||||
{
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
for &x in &permutation {
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>x));
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new())
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
|
||||
b.iter(|| {
|
||||
let n = test::black_box(7000u32);
|
||||
let mut a = 0u64;
|
||||
for i in (0u32..n / 7).map(|val| val * 7) {
|
||||
a ^= fast_field_reader.get(i);
|
||||
}
|
||||
a
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_intfastfield_fflookup(b: &mut Bencher) {
|
||||
let path = Path::new("test");
|
||||
let permutation = generate_permutation();
|
||||
let mut directory: RAMDirectory = RAMDirectory::create();
|
||||
{
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
for &x in &permutation {
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>x));
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new())
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
let source = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&source).unwrap();
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = FastFieldReader::<u64>::open(data);
|
||||
|
||||
b.iter(|| {
|
||||
let n = test::black_box(1000u32);
|
||||
let mut a = 0u32;
|
||||
for _ in 0u32..n {
|
||||
a = fast_field_reader.get(a) as u32;
|
||||
}
|
||||
a
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -67,10 +67,12 @@ impl<Item: FastValue> FastFieldReader<Item> {
|
||||
///
|
||||
/// May panic if `doc` is greater than the segment
|
||||
// `maxdoc`.
|
||||
#[inline(always)]
|
||||
pub fn get(&self, doc: DocId) -> Item {
|
||||
self.get_u64(u64::from(doc))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn get_u64(&self, doc: u64) -> Item {
|
||||
Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc))
|
||||
}
|
||||
|
||||
13
src/lib.rs
13
src/lib.rs
@@ -249,7 +249,6 @@ pub struct DocAddress(pub SegmentLocalId, pub DocId);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
|
||||
use crate::core::SegmentReader;
|
||||
use crate::docset::DocSet;
|
||||
@@ -895,3 +894,15 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "forbench")]
|
||||
pub mod forbench {
|
||||
pub mod compression {
|
||||
pub use crate::postings::compression::*;
|
||||
}
|
||||
|
||||
pub mod bitset {
|
||||
pub use crate::common::BitSet;
|
||||
pub use crate::common::TinySet;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,9 +160,9 @@ impl VIntEncoder for BlockEncoder {
|
||||
}
|
||||
|
||||
impl VIntDecoder for BlockDecoder {
|
||||
fn uncompress_vint_sorted<'a>(
|
||||
fn uncompress_vint_sorted(
|
||||
&mut self,
|
||||
compressed_data: &'a [u8],
|
||||
compressed_data: &[u8],
|
||||
offset: u32,
|
||||
num_els: usize,
|
||||
) -> usize {
|
||||
@@ -170,7 +170,7 @@ impl VIntDecoder for BlockDecoder {
|
||||
vint::uncompress_sorted(compressed_data, &mut self.output.0[..num_els], offset)
|
||||
}
|
||||
|
||||
fn uncompress_vint_unsorted<'a>(&mut self, compressed_data: &'a [u8], num_els: usize) -> usize {
|
||||
fn uncompress_vint_unsorted(&mut self, compressed_data: &[u8], num_els: usize) -> usize {
|
||||
self.output_len = num_els;
|
||||
vint::uncompress_unsorted(compressed_data, &mut self.output.0[..num_els])
|
||||
}
|
||||
@@ -268,78 +268,17 @@ pub mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
|
||||
use super::*;
|
||||
use rand::SeedableRng;
|
||||
use rand::{Rng, XorShiftRng};
|
||||
use test::Bencher;
|
||||
|
||||
fn generate_array_with_seed(n: usize, ratio: f64, seed_val: u8) -> Vec<u32> {
|
||||
let seed: &[u8; 16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, seed_val];
|
||||
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
|
||||
(0u32..).filter(|_| rng.gen_bool(ratio)).take(n).collect()
|
||||
}
|
||||
|
||||
pub fn generate_array(n: usize, ratio: f64) -> Vec<u32> {
|
||||
generate_array_with_seed(n, ratio, 4)
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_compress(b: &mut Bencher) {
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
|
||||
b.iter(|| {
|
||||
encoder.compress_block_sorted(&data, 0u32);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_uncompress(b: &mut Bencher) {
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let data = generate_array(COMPRESSION_BLOCK_SIZE, 0.1);
|
||||
let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
|
||||
let mut decoder = BlockDecoder::new();
|
||||
b.iter(|| {
|
||||
decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_docs_compression_numbits() {
|
||||
for expected_num_bits in 0u8.. {
|
||||
for expected_num_bits in 0u8..33u8 {
|
||||
let mut data = [0u32; 128];
|
||||
if expected_num_bits > 0 {
|
||||
data[0] = (1u64 << (expected_num_bits as usize) - 1) as u32;
|
||||
data[0] = (1u64 << (expected_num_bits as u64) - 1u64) as u32;
|
||||
}
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let (num_bits, compressed) = encoder.compress_block_unsorted(&data);
|
||||
assert_eq!(compressed.len(), compressed_block_size(num_bits));
|
||||
}
|
||||
}
|
||||
|
||||
const NUM_INTS_BENCH_VINT: usize = 10;
|
||||
|
||||
#[bench]
|
||||
fn bench_compress_vint(b: &mut Bencher) {
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
|
||||
b.iter(|| {
|
||||
encoder.compress_vint_sorted(&data, 0u32);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_uncompress_vint(b: &mut Bencher) {
|
||||
let mut encoder = BlockEncoder::new();
|
||||
let data = generate_array(NUM_INTS_BENCH_VINT, 0.001);
|
||||
let compressed = encoder.compress_vint_sorted(&data, 0u32);
|
||||
let mut decoder = BlockDecoder::new();
|
||||
b.iter(|| {
|
||||
decoder.uncompress_vint_sorted(compressed, 0u32, NUM_INTS_BENCH_VINT);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ Postings module (also called inverted index)
|
||||
*/
|
||||
|
||||
mod block_search;
|
||||
|
||||
pub(crate) mod compression;
|
||||
/// Postings module
|
||||
///
|
||||
|
||||
@@ -218,49 +218,3 @@ mod tests {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
|
||||
use super::BitSet;
|
||||
use super::BitSetDocSet;
|
||||
use test;
|
||||
use tests;
|
||||
use DocSet;
|
||||
|
||||
#[bench]
|
||||
fn bench_bitset_1pct_insert(b: &mut test::Bencher) {
|
||||
use tests;
|
||||
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
|
||||
b.iter(|| {
|
||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||
for el in els.iter().cloned() {
|
||||
bitset.insert(el);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_bitset_1pct_clone(b: &mut test::Bencher) {
|
||||
use tests;
|
||||
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
|
||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||
for el in els {
|
||||
bitset.insert(el);
|
||||
}
|
||||
b.iter(|| bitset.clone());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_bitset_1pct_clone_iterate(b: &mut test::Bencher) {
|
||||
let els = tests::sample(1_000_000u32, 0.01);
|
||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||
for el in els {
|
||||
bitset.insert(el);
|
||||
}
|
||||
b.iter(|| {
|
||||
let mut docset = BitSetDocSet::from(bitset.clone());
|
||||
while docset.advance() {}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -411,52 +411,3 @@ mod tests {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
mod bench {
|
||||
|
||||
use query::score_combiner::DoNothingCombiner;
|
||||
use query::ConstScorer;
|
||||
use query::Union;
|
||||
use query::VecDocSet;
|
||||
use test::Bencher;
|
||||
use tests;
|
||||
use DocId;
|
||||
use DocSet;
|
||||
|
||||
#[bench]
|
||||
fn bench_union_3_high(bench: &mut Bencher) {
|
||||
let union_docset: Vec<Vec<DocId>> = vec![
|
||||
tests::sample_with_seed(100_000, 0.1, 0),
|
||||
tests::sample_with_seed(100_000, 0.2, 1),
|
||||
];
|
||||
bench.iter(|| {
|
||||
let mut v = Union::<_, DoNothingCombiner>::from(
|
||||
union_docset
|
||||
.iter()
|
||||
.map(|doc_ids| VecDocSet::from(doc_ids.clone()))
|
||||
.map(ConstScorer::new)
|
||||
.collect::<Vec<_>>(),
|
||||
);
|
||||
while v.advance() {}
|
||||
});
|
||||
}
|
||||
#[bench]
|
||||
fn bench_union_3_low(bench: &mut Bencher) {
|
||||
let union_docset: Vec<Vec<DocId>> = vec![
|
||||
tests::sample_with_seed(100_000, 0.01, 0),
|
||||
tests::sample_with_seed(100_000, 0.05, 1),
|
||||
tests::sample_with_seed(100_000, 0.001, 2),
|
||||
];
|
||||
bench.iter(|| {
|
||||
let mut v = Union::<_, DoNothingCombiner>::from(
|
||||
union_docset
|
||||
.iter()
|
||||
.map(|doc_ids| VecDocSet::from(doc_ids.clone()))
|
||||
.map(ConstScorer::new)
|
||||
.collect::<Vec<_>>(),
|
||||
);
|
||||
while v.advance() {}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user