mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-26 12:09:57 +00:00
* fix cardinality aggregation performance fix cardinality performance by fetching multiple terms at once. This avoids decompressing the same block and keeps the buffer state between terms. add cardinality aggregation benchmark bump rust version to 1.66 Performance comparison to before (AllQuery) ``` full cardinality_agg Memory: 3.5 MB (-0.00%) Avg: 21.2256ms (-97.78%) Median: 21.0042ms (-97.82%) [20.4717ms .. 23.6206ms] terms_few_with_cardinality_agg Memory: 10.6 MB Avg: 81.9293ms (-97.37%) Median: 81.5526ms (-97.38%) [79.7564ms .. 88.0374ms] dense cardinality_agg Memory: 3.6 MB (-0.00%) Avg: 25.9372ms (-97.24%) Median: 25.7744ms (-97.25%) [24.7241ms .. 27.8793ms] terms_few_with_cardinality_agg Memory: 10.6 MB Avg: 93.9897ms (-96.91%) Median: 92.7821ms (-96.94%) [90.3312ms .. 117.4076ms] sparse cardinality_agg Memory: 895.4 KB (-0.00%) Avg: 22.5113ms (-95.01%) Median: 22.5629ms (-94.99%) [22.1628ms .. 22.9436ms] terms_few_with_cardinality_agg Memory: 680.2 KB Avg: 26.4250ms (-94.85%) Median: 26.4135ms (-94.86%) [26.3210ms .. 26.6774ms] ``` * clippy * assert for sorted ordinals
28 lines
651 B
Rust
28 lines
651 B
Rust
use tantivy_stacker::ArenaHashMap;
|
|
|
|
const ALICE: &str = include_str!("../../benches/alice.txt");
|
|
|
|
fn main() {
|
|
create_hash_map((0..100_000_000).map(|el| el.to_string()));
|
|
|
|
for _ in 0..1000 {
|
|
create_hash_map(ALICE.split_whitespace());
|
|
}
|
|
}
|
|
|
|
fn create_hash_map<T: AsRef<str>>(terms: impl Iterator<Item = T>) -> ArenaHashMap {
|
|
let mut map = ArenaHashMap::with_capacity(4);
|
|
for term in terms {
|
|
map.mutate_or_create(term.as_ref().as_bytes(), |val| {
|
|
if let Some(mut val) = val {
|
|
val += 1;
|
|
val
|
|
} else {
|
|
1u64
|
|
}
|
|
});
|
|
}
|
|
|
|
map
|
|
}
|