mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-10 11:02:55 +00:00
* fix cardinality aggregation performance fix cardinality performance by fetching multiple terms at once. This avoids decompressing the same block and keeps the buffer state between terms. add cardinality aggregation benchmark bump rust version to 1.66 Performance comparison to before (AllQuery) ``` full cardinality_agg Memory: 3.5 MB (-0.00%) Avg: 21.2256ms (-97.78%) Median: 21.0042ms (-97.82%) [20.4717ms .. 23.6206ms] terms_few_with_cardinality_agg Memory: 10.6 MB Avg: 81.9293ms (-97.37%) Median: 81.5526ms (-97.38%) [79.7564ms .. 88.0374ms] dense cardinality_agg Memory: 3.6 MB (-0.00%) Avg: 25.9372ms (-97.24%) Median: 25.7744ms (-97.25%) [24.7241ms .. 27.8793ms] terms_few_with_cardinality_agg Memory: 10.6 MB Avg: 93.9897ms (-96.91%) Median: 92.7821ms (-96.94%) [90.3312ms .. 117.4076ms] sparse cardinality_agg Memory: 895.4 KB (-0.00%) Avg: 22.5113ms (-95.01%) Median: 22.5629ms (-94.99%) [22.1628ms .. 22.9436ms] terms_few_with_cardinality_agg Memory: 680.2 KB Avg: 26.4250ms (-94.85%) Median: 26.4135ms (-94.86%) [26.3210ms .. 26.6774ms] ``` * clippy * assert for sorted ordinals
66 lines
1.7 KiB
Rust
66 lines
1.7 KiB
Rust
const CONTINUE_BIT: u8 = 128u8;
|
|
|
|
pub fn serialize(mut val: u64, buffer: &mut [u8]) -> usize {
|
|
for (i, b) in buffer.iter_mut().enumerate() {
|
|
let next_byte: u8 = (val & 127u64) as u8;
|
|
val >>= 7;
|
|
if val == 0u64 {
|
|
*b = next_byte;
|
|
return i + 1;
|
|
} else {
|
|
*b = next_byte | CONTINUE_BIT;
|
|
}
|
|
}
|
|
10 //< actually unreachable
|
|
}
|
|
|
|
pub fn serialize_into_vec(val: u64, buffer: &mut Vec<u8>) {
|
|
let mut buf = [0u8; 10];
|
|
let num_bytes = serialize(val, &mut buf[..]);
|
|
buffer.extend_from_slice(&buf[..num_bytes]);
|
|
}
|
|
|
|
// super slow but we don't care
|
|
pub fn deserialize_read(buf: &[u8]) -> (usize, u64) {
|
|
let mut result = 0u64;
|
|
let mut shift = 0u64;
|
|
let mut consumed = 0;
|
|
|
|
for &b in buf {
|
|
consumed += 1;
|
|
result |= u64::from(b % 128u8) << shift;
|
|
if b < CONTINUE_BIT {
|
|
break;
|
|
}
|
|
shift += 7;
|
|
}
|
|
(consumed, result)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::{deserialize_read, serialize};
|
|
|
|
fn aux_test_int(val: u64, expect_len: usize) {
|
|
let mut buffer = [0u8; 14];
|
|
assert_eq!(serialize(val, &mut buffer[..]), expect_len);
|
|
assert_eq!(deserialize_read(&buffer), (expect_len, val));
|
|
}
|
|
|
|
#[test]
|
|
fn test_vint() {
|
|
aux_test_int(0u64, 1);
|
|
aux_test_int(17u64, 1);
|
|
aux_test_int(127u64, 1);
|
|
aux_test_int(128u64, 2);
|
|
aux_test_int(123423418u64, 4);
|
|
for i in 1..63 {
|
|
let power_of_two = 1u64 << i;
|
|
aux_test_int(power_of_two + 1, (i / 7) + 1);
|
|
aux_test_int(power_of_two, (i / 7) + 1);
|
|
aux_test_int(power_of_two - 1, ((i - 1) / 7) + 1);
|
|
}
|
|
aux_test_int(u64::MAX, 10);
|
|
}
|
|
}
|