From 4b90848383f341c8a32a1c8030814c56237e55f4 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Mon, 1 Jun 2026 11:25:53 +0200 Subject: [PATCH] Add filter_vec benchmarks (dense, sparse, full coverage) Uses get_ids_for_value_range to exercise both the bitpacking decode and the filter_vec SIMD path together under realistic cache conditions. --- bitpacker/Cargo.toml | 5 ++ bitpacker/benches/bench.rs | 143 +++++++++++++++++++++++-------------- 2 files changed, 94 insertions(+), 54 deletions(-) diff --git a/bitpacker/Cargo.toml b/bitpacker/Cargo.toml index 599c239c4..5dc28ca73 100644 --- a/bitpacker/Cargo.toml +++ b/bitpacker/Cargo.toml @@ -18,5 +18,10 @@ homepage = "https://github.com/quickwit-oss/tantivy" bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker1x"] } [dev-dependencies] +binggan = "0.17.0" rand = "0.9" proptest = "1" + +[[bench]] +name = "bench" +harness = false diff --git a/bitpacker/benches/bench.rs b/bitpacker/benches/bench.rs index 12bfeb53e..10187b42d 100644 --- a/bitpacker/benches/bench.rs +++ b/bitpacker/benches/bench.rs @@ -1,65 +1,100 @@ -#![feature(test)] +use std::cell::RefCell; -extern crate test; +use binggan::{BenchRunner, black_box}; +use rand::rng; +use rand::seq::IteratorRandom; +use tantivy_bitpacker::{BitPacker, BitUnpacker, BlockedBitpacker}; -#[cfg(test)] -mod tests { - use rand::rng; - use rand::seq::IteratorRandom; - use tantivy_bitpacker::{BitPacker, BitUnpacker, BlockedBitpacker}; - use test::Bencher; +fn create_bitpacked_data(bit_width: u8, num_els: u32) -> Vec { + let mut bitpacker = BitPacker::new(); + let mut buffer = Vec::new(); + for _ in 0..num_els { + bitpacker.write(0u64, bit_width, &mut buffer).unwrap(); + bitpacker.flush(&mut buffer).unwrap(); + } + buffer +} - #[inline(never)] - fn create_bitpacked_data(bit_width: u8, num_els: u32) -> Vec { - let mut bitpacker = BitPacker::new(); - let mut buffer = Vec::new(); - for _ in 0..num_els { - // the values do not matter. - bitpacker.write(0u64, bit_width, &mut buffer).unwrap(); - bitpacker.flush(&mut buffer).unwrap(); +const N: usize = 100_000; +const MAX_VAL: u64 = 1_000; +const BIT_WIDTH: u8 = 10; // 2^10 = 1024 > MAX_VAL + +fn create_packed_data() -> (BitUnpacker, Vec) { + let mut bitpacker = BitPacker::new(); + let mut data = Vec::new(); + for i in 0..N as u64 { + let val = i * MAX_VAL / N as u64; + bitpacker.write(val, BIT_WIDTH, &mut data).unwrap(); + } + bitpacker.close(&mut data).unwrap(); + (BitUnpacker::new(BIT_WIDTH), data) +} + +fn bench_bitpacking() { + let mut runner = BenchRunner::new(); + let bit_width = 3; + let num_els = 1_000_000u32; + let bit_unpacker = BitUnpacker::new(bit_width); + let data = create_bitpacked_data(bit_width, num_els); + let idxs: Vec = (0..num_els).choose_multiple(&mut rng(), 100_000); + runner.bench_function("bitpacking_read", move |_| { + let mut out = 0u64; + for &idx in &idxs { + out = out.wrapping_add(bit_unpacker.get(idx, &data[..])); } - buffer - } + black_box(out); + }); +} - #[bench] - fn bench_bitpacking_read(b: &mut Bencher) { - let bit_width = 3; - let num_els = 1_000_000u32; - let bit_unpacker = BitUnpacker::new(bit_width); - let data = create_bitpacked_data(bit_width, num_els); - let idxs: Vec = (0..num_els).choose_multiple(&mut rng(), 100_000); - b.iter(|| { - let mut out = 0u64; - for &idx in &idxs { - out = out.wrapping_add(bit_unpacker.get(idx, &data[..])); - } - out - }); +fn bench_blocked_bitpacker() { + let mut runner = BenchRunner::new(); + let mut blocked_bitpacker = BlockedBitpacker::new(); + for val in 0..=21500 { + blocked_bitpacker.add(val * val); } - - #[bench] - fn bench_blockedbitp_read(b: &mut Bencher) { + runner.bench_function("blockedbitp_read", move |_| { + let mut out = 0u64; + for val in 0..=21500 { + out = out.wrapping_add(blocked_bitpacker.get(val)); + } + black_box(out); + }); + runner.bench_function("blockedbitp_create", |_| { let mut blocked_bitpacker = BlockedBitpacker::new(); for val in 0..=21500 { blocked_bitpacker.add(val * val); } - b.iter(|| { - let mut out = 0u64; - for val in 0..=21500 { - out = out.wrapping_add(blocked_bitpacker.get(val)); - } - out - }); - } - - #[bench] - fn bench_blockedbitp_create(b: &mut Bencher) { - b.iter(|| { - let mut blocked_bitpacker = BlockedBitpacker::new(); - for val in 0..=21500 { - blocked_bitpacker.add(val * val); - } - blocked_bitpacker - }); - } + black_box(blocked_bitpacker); + }); +} + +fn bench_filter_vec() { + let mut runner = BenchRunner::new(); + + let (unpacker, data) = create_packed_data(); + let positions = RefCell::new(Vec::with_capacity(N)); + runner.bench_function("filter_vec_dense", move |_| { + unpacker.get_ids_for_value_range(250..=750, 0..N as u32, &data, &mut positions.borrow_mut()); + black_box(positions.borrow().len()); + }); + + let (unpacker, data) = create_packed_data(); + let positions = RefCell::new(Vec::with_capacity(N)); + runner.bench_function("filter_vec_sparse", move |_| { + unpacker.get_ids_for_value_range(0..=50, 0..N as u32, &data, &mut positions.borrow_mut()); + black_box(positions.borrow().len()); + }); + + let (unpacker, data) = create_packed_data(); + let positions = RefCell::new(Vec::with_capacity(N)); + runner.bench_function("filter_vec_full", move |_| { + unpacker.get_ids_for_value_range(0..=MAX_VAL, 0..N as u32, &data, &mut positions.borrow_mut()); + black_box(positions.borrow().len()); + }); +} + +fn main() { + bench_bitpacking(); + bench_blocked_bitpacker(); + bench_filter_vec(); }