Compare commits

...

1 Commits

Author SHA1 Message Date
Pascal Seitz
d860474c03 faster intersection 2026-06-28 10:40:42 +02:00

View File

@@ -20,17 +20,45 @@ use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
/// - The block is sorted. Some elements may appear several times. This is the case at the
/// end of the last block for instance.
/// - The target is assumed smaller or equal to the last element of the block.
pub fn branchless_binary_search(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
let mut start = 0;
let mut len = arr.len();
for _ in 0..7 {
len /= 2;
let pivot = unsafe { *arr.get_unchecked(start + len - 1) };
if pivot < target {
start += len;
// Generic over CACHE_LINE_ELEMS so we can tune the block size.
// NUM_BLOCKS = COMPRESSION_BLOCK_SIZE / CACHE_LINE_ELEMS must be a power of two.
//
// Step 1: while-loop over halvings (no hardcoded iteration count) to find
// which CACHE_LINE_ELEMS-wide block holds the answer.
// Step 2: branchless linear scan over that block — one cache line load,
// auto-vectorizable.
#[inline(always)]
pub fn kary_search<const K: usize>(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
let mut base = 0usize;
let mut range = COMPRESSION_BLOCK_SIZE;
loop {
let step = range / K;
if step == 0 {
break;
}
// Count how many segment-end pivots are < target (branchless, unrolled).
let mut count = 0usize;
for i in 1..K {
count += (unsafe { *arr.get_unchecked(base + i * step - 1) } < target) as usize;
}
base += count * step;
range = step;
}
start
// Linear scan over the ≤K remaining elements.
let mut count = 0usize;
for i in 0..range {
count += (unsafe { *arr.get_unchecked(base + i) } < target) as usize;
}
base + count
}
// Concrete instantiations for assembly inspection / benchmarking.
#[inline]
pub fn branchless_binary_search(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
kary_search::<8>(arr, target)
}
#[cfg(test)]