mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 17:22:54 +00:00
101 lines
3.0 KiB
Rust
101 lines
3.0 KiB
Rust
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
|
|
|
/// Search the first index containing an element greater or equal to
|
|
/// the target.
|
|
///
|
|
/// The results should be equivalent to
|
|
/// ```compile_fail
|
|
/// block[..]
|
|
// .iter()
|
|
// .take_while(|&&val| val < target)
|
|
// .count()
|
|
/// ```
|
|
///
|
|
/// the `start` argument is just used to hint that the response is
|
|
/// greater than beyond `start`. the implementation may or may not use
|
|
/// it for optimization.
|
|
///
|
|
/// # Assumption
|
|
///
|
|
/// - The block is sorted. Some elements may appear several times. This is the case at the
|
|
/// end of the last block for instance.
|
|
/// - The target is assumed smaller or equal to the last element of the block.
|
|
pub fn branchless_binary_search(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
|
|
let mut start = 0;
|
|
let mut len = arr.len();
|
|
for _ in 0..7 {
|
|
len /= 2;
|
|
let pivot = unsafe { *arr.get_unchecked(start + len - 1) };
|
|
if pivot < target {
|
|
start += len;
|
|
}
|
|
}
|
|
start
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use std::collections::HashSet;
|
|
|
|
use proptest::prelude::*;
|
|
|
|
use super::branchless_binary_search;
|
|
use crate::docset::TERMINATED;
|
|
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
|
|
|
fn search_in_block_trivial_but_slow(block: &[u32], target: u32) -> usize {
|
|
block.iter().take_while(|&&val| val < target).count()
|
|
}
|
|
|
|
fn util_test_search_in_block(block: &[u32], target: u32) {
|
|
let cursor = search_in_block_trivial_but_slow(block, target);
|
|
assert!(cursor < COMPRESSION_BLOCK_SIZE);
|
|
assert!(block[cursor] >= target);
|
|
if cursor > 0 {
|
|
assert!(block[cursor - 1] < target);
|
|
}
|
|
assert_eq!(block.len(), COMPRESSION_BLOCK_SIZE);
|
|
let mut output_buffer = [TERMINATED; COMPRESSION_BLOCK_SIZE];
|
|
output_buffer[..block.len()].copy_from_slice(block);
|
|
assert_eq!(branchless_binary_search(&output_buffer, target), cursor);
|
|
}
|
|
|
|
fn util_test_search_in_block_all(block: &[u32]) {
|
|
let mut targets = HashSet::new();
|
|
targets.insert(0);
|
|
for &val in block {
|
|
if val > 0 {
|
|
targets.insert(val - 1);
|
|
}
|
|
targets.insert(val);
|
|
}
|
|
for target in targets {
|
|
util_test_search_in_block(block, target);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_search_in_branchless_binary_search() {
|
|
let v: Vec<u32> = (0..COMPRESSION_BLOCK_SIZE).map(|i| i as u32 * 2).collect();
|
|
util_test_search_in_block_all(&v[..]);
|
|
}
|
|
|
|
fn monotonous_block() -> impl Strategy<Value = Vec<u32>> {
|
|
prop::collection::vec(0u32..5u32, COMPRESSION_BLOCK_SIZE).prop_map(|mut deltas| {
|
|
let mut el = 0;
|
|
for i in 0..COMPRESSION_BLOCK_SIZE {
|
|
el += deltas[i];
|
|
deltas[i] = el;
|
|
}
|
|
deltas
|
|
})
|
|
}
|
|
|
|
proptest! {
|
|
#[test]
|
|
fn test_proptest_branchless_binary_search(block in monotonous_block()) {
|
|
util_test_search_in_block_all(&block[..]);
|
|
}
|
|
}
|
|
}
|