mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
* fix cardinality aggregation performance fix cardinality performance by fetching multiple terms at once. This avoids decompressing the same block and keeps the buffer state between terms. add cardinality aggregation benchmark bump rust version to 1.66 Performance comparison to before (AllQuery) ``` full cardinality_agg Memory: 3.5 MB (-0.00%) Avg: 21.2256ms (-97.78%) Median: 21.0042ms (-97.82%) [20.4717ms .. 23.6206ms] terms_few_with_cardinality_agg Memory: 10.6 MB Avg: 81.9293ms (-97.37%) Median: 81.5526ms (-97.38%) [79.7564ms .. 88.0374ms] dense cardinality_agg Memory: 3.6 MB (-0.00%) Avg: 25.9372ms (-97.24%) Median: 25.7744ms (-97.25%) [24.7241ms .. 27.8793ms] terms_few_with_cardinality_agg Memory: 10.6 MB Avg: 93.9897ms (-96.91%) Median: 92.7821ms (-96.94%) [90.3312ms .. 117.4076ms] sparse cardinality_agg Memory: 895.4 KB (-0.00%) Avg: 22.5113ms (-95.01%) Median: 22.5629ms (-94.99%) [22.1628ms .. 22.9436ms] terms_few_with_cardinality_agg Memory: 680.2 KB Avg: 26.4250ms (-94.85%) Median: 26.4135ms (-94.86%) [26.3210ms .. 26.6774ms] ``` * clippy * assert for sorted ordinals
88 lines
2.4 KiB
Rust
88 lines
2.4 KiB
Rust
use std::collections::BTreeSet;
|
|
use std::io;
|
|
|
|
use common::file_slice::FileSlice;
|
|
use criterion::{criterion_group, criterion_main, Criterion};
|
|
use rand::rngs::StdRng;
|
|
use rand::{Rng, SeedableRng};
|
|
use tantivy_sstable::{Dictionary, MonotonicU64SSTable};
|
|
|
|
const CHARSET: &[u8] = b"abcdefghij";
|
|
|
|
fn generate_key(rng: &mut impl Rng) -> String {
|
|
let len = rng.gen_range(3..12);
|
|
std::iter::from_fn(|| {
|
|
let idx = rng.gen_range(0..CHARSET.len());
|
|
Some(CHARSET[idx] as char)
|
|
})
|
|
.take(len)
|
|
.collect()
|
|
}
|
|
|
|
fn prepare_sstable() -> io::Result<Dictionary<MonotonicU64SSTable>> {
|
|
let mut rng = StdRng::from_seed([3u8; 32]);
|
|
let mut els = BTreeSet::new();
|
|
while els.len() < 100_000 {
|
|
els.insert(generate_key(&mut rng));
|
|
}
|
|
let mut dictionary_builder = Dictionary::<MonotonicU64SSTable>::builder(Vec::new())?;
|
|
for (ord, word) in els.iter().enumerate() {
|
|
dictionary_builder.insert(word, &(ord as u64))?;
|
|
}
|
|
let buffer = dictionary_builder.finish()?;
|
|
let dictionary = Dictionary::open(FileSlice::from(buffer))?;
|
|
Ok(dictionary)
|
|
}
|
|
|
|
fn stream_bench(
|
|
dictionary: &Dictionary<MonotonicU64SSTable>,
|
|
lower: &[u8],
|
|
upper: &[u8],
|
|
do_scan: bool,
|
|
) -> usize {
|
|
let mut stream = dictionary
|
|
.range()
|
|
.ge(lower)
|
|
.lt(upper)
|
|
.into_stream()
|
|
.unwrap();
|
|
if !do_scan {
|
|
return 0;
|
|
}
|
|
let mut count = 0;
|
|
while stream.advance() {
|
|
count += 1;
|
|
}
|
|
count
|
|
}
|
|
|
|
pub fn criterion_benchmark(c: &mut Criterion) {
|
|
let dict = prepare_sstable().unwrap();
|
|
c.bench_function("short_scan_init", |b| {
|
|
b.iter(|| stream_bench(&dict, b"fa", b"fana", false))
|
|
});
|
|
c.bench_function("short_scan_init_and_scan", |b| {
|
|
b.iter(|| {
|
|
assert_eq!(stream_bench(&dict, b"fa", b"faz", true), 971);
|
|
})
|
|
});
|
|
c.bench_function("full_scan_init_and_scan_full_with_bound", |b| {
|
|
b.iter(|| {
|
|
assert_eq!(stream_bench(&dict, b"", b"z", true), 100_000);
|
|
})
|
|
});
|
|
c.bench_function("full_scan_init_and_scan_full_no_bounds", |b| {
|
|
b.iter(|| {
|
|
let mut stream = dict.stream().unwrap();
|
|
let mut count = 0;
|
|
while stream.advance() {
|
|
count += 1;
|
|
}
|
|
count
|
|
})
|
|
});
|
|
}
|
|
|
|
criterion_group!(benches, criterion_benchmark);
|
|
criterion_main!(benches);
|