mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-29 22:00:48 +00:00
Compare commits
2 Commits
trinity.po
...
seek_dange
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2d7cf64941 | ||
|
|
c833743701 |
@@ -981,27 +981,19 @@ where
|
||||
) -> crate::Result<IntermediateBucketResult> {
|
||||
let mut entries: Vec<(u64, Bucket)> = term_buckets.into_vec();
|
||||
|
||||
let segment_size = term_req.req.segment_size as usize;
|
||||
|
||||
// select_nth_unstable_by_key(segment_size, ...) places the (k+1)-th element at
|
||||
// entries[segment_size] and guarantees entries[0..segment_size] are the top-k,
|
||||
// unordered. We need this to properly compute term_doc_count_before_cutoff.
|
||||
match &term_req.req.order.target {
|
||||
OrderTarget::Key => {
|
||||
// We rely on the fact, that term ordinals match the order of the strings
|
||||
// TODO: We could have a special collector, that keeps only TOP n results at any
|
||||
// time.
|
||||
if entries.len() > segment_size {
|
||||
if term_req.req.order.order == Order::Desc {
|
||||
entries
|
||||
.select_nth_unstable_by_key(segment_size, |b| std::cmp::Reverse(b.0));
|
||||
} else {
|
||||
entries.select_nth_unstable_by_key(segment_size, |b| b.0);
|
||||
}
|
||||
if term_req.req.order.order == Order::Desc {
|
||||
entries.sort_unstable_by_key(|bucket| std::cmp::Reverse(bucket.0));
|
||||
} else {
|
||||
entries.sort_unstable_by_key(|bucket| bucket.0);
|
||||
}
|
||||
}
|
||||
OrderTarget::SubAggregation(sub_agg_path) => {
|
||||
// Peek segment-level metric values, select top-k, then fall through to
|
||||
// Peek segment-level metric values, sort, then fall through to
|
||||
// `cut_off_buckets`. Like Elasticsearch, we always cut off when ordering
|
||||
// by a sub-agg: top-K results are approximate and may differ from the
|
||||
// global ordering, especially for non-monotonic metrics like avg/min.
|
||||
@@ -1011,7 +1003,7 @@ where
|
||||
))
|
||||
})?;
|
||||
let (agg_name, agg_prop) = get_agg_name_and_property(sub_agg_path);
|
||||
// Fetch values up-front; otherwise sort would re-compute per call
|
||||
// Fetch values up-front; otherwise sort would re-compute per comparison
|
||||
let mut keyed: Vec<(f64, (u64, Bucket))> = entries
|
||||
.into_iter()
|
||||
.map(|bucket| {
|
||||
@@ -1021,34 +1013,28 @@ where
|
||||
(metric_value, bucket)
|
||||
})
|
||||
.collect();
|
||||
if keyed.len() > segment_size {
|
||||
if term_req.req.order.order == Order::Desc {
|
||||
keyed.select_nth_unstable_by(segment_size, |a, b| {
|
||||
b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
} else {
|
||||
keyed.select_nth_unstable_by(segment_size, |a, b| {
|
||||
a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
}
|
||||
if term_req.req.order.order == Order::Desc {
|
||||
keyed.sort_unstable_by(|a, b| {
|
||||
b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
} else {
|
||||
keyed.sort_unstable_by(|a, b| {
|
||||
a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
}
|
||||
entries = keyed.into_iter().map(|(_, e)| e).collect();
|
||||
}
|
||||
OrderTarget::Count => {
|
||||
if entries.len() > segment_size {
|
||||
if term_req.req.order.order == Order::Desc {
|
||||
entries.select_nth_unstable_by_key(segment_size, |b| {
|
||||
std::cmp::Reverse(b.1.count)
|
||||
});
|
||||
} else {
|
||||
entries.select_nth_unstable_by_key(segment_size, |b| b.1.count);
|
||||
}
|
||||
if term_req.req.order.order == Order::Desc {
|
||||
entries.sort_unstable_by_key(|bucket| std::cmp::Reverse(bucket.1.count));
|
||||
} else {
|
||||
entries.sort_unstable_by_key(|bucket| bucket.1.count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let (term_doc_count_before_cutoff, sum_other_doc_count) =
|
||||
cut_off_buckets(&mut entries, segment_size);
|
||||
cut_off_buckets(&mut entries, term_req.req.segment_size as usize);
|
||||
|
||||
let mut dict: FxHashMap<IntermediateKey, IntermediateTermBucketEntry> = Default::default();
|
||||
dict.reserve(entries.len());
|
||||
|
||||
@@ -1,36 +1,78 @@
|
||||
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
||||
|
||||
/// Search the first index containing an element greater or equal to
|
||||
/// the target.
|
||||
/// Returns the index of the first element in `arr` that is greater than or
|
||||
/// equal to `target`.
|
||||
///
|
||||
/// The results should be equivalent to
|
||||
/// ```compile_fail
|
||||
/// block[..]
|
||||
// .iter()
|
||||
// .take_while(|&&val| val < target)
|
||||
// .count()
|
||||
/// This is equivalent to:
|
||||
///
|
||||
/// ```ignore
|
||||
/// arr.iter().take_while(|&&val| val < target).count()
|
||||
/// ```
|
||||
///
|
||||
/// the `start` argument is just used to hint that the response is
|
||||
/// greater than beyond `start`. The implementation may or may not use
|
||||
/// it for optimization.
|
||||
///
|
||||
/// # Assumption
|
||||
/// # Assumptions
|
||||
///
|
||||
/// - The block is sorted. Some elements may appear several times. This is the case at the
|
||||
/// end of the last block for instance.
|
||||
/// - The target is assumed smaller or equal to the last element of the block.
|
||||
pub fn branchless_binary_search(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
|
||||
let mut start = 0;
|
||||
let mut len = arr.len();
|
||||
for _ in 0..7 {
|
||||
len /= 2;
|
||||
let pivot = unsafe { *arr.get_unchecked(start + len - 1) };
|
||||
if pivot < target {
|
||||
start += len;
|
||||
/// - `arr` is sorted in nondecreasing order. Values may be repeated; the last block is often padded
|
||||
/// with duplicates of its final value.
|
||||
/// - `target` is less than or equal to the last element in `arr`, so the result is always a valid
|
||||
/// index into the block.
|
||||
///
|
||||
/// # `K`
|
||||
///
|
||||
/// `K` is the branching factor. Each reduction probes `K - 1` segment-end
|
||||
/// pivots, keeps the matching segment, and finally linearly scans the remaining
|
||||
/// range. `K` must be one of `2`, `4`, `8`, `16`, or `32`.
|
||||
///
|
||||
/// This is the "k-ary search on a sorted array" variant of Schlegel, Gemulla & Lehner,
|
||||
/// "k-Ary Search on Modern Processors", DaMoN 2009 (<https://dl.acm.org/doi/10.1145/1565694.1565705>),
|
||||
/// specialized to a lower-bound (no equality early-exit) with a linear scan over the final
|
||||
/// `< K` elements. We do not use their linearized-tree (`k-ary-lt`) layout, which would require
|
||||
/// reordering the block.
|
||||
///
|
||||
/// The core idea vs a traditional binary search is that we can check multiple numbers in parallel,
|
||||
/// which better utilizes the CPU's instruction-level parallelism.
|
||||
///
|
||||
/// `kary_search::<8>` reduces in three steps: 128 -> 16 -> 2, then a 2-element scan. It could be
|
||||
/// done in only two steps (128 -> 16, then scanning all 16 contiguous elements). For that
|
||||
/// we need popcount for that to be fast though (TODO).
|
||||
#[inline(always)]
|
||||
pub fn kary_search<const K: usize>(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
|
||||
const {
|
||||
assert!(
|
||||
matches!(K, 2 | 4 | 8 | 16 | 32),
|
||||
"K must be one of 2, 4, 8, 16, or 32"
|
||||
);
|
||||
};
|
||||
|
||||
let mut base = 0usize;
|
||||
let mut range = COMPRESSION_BLOCK_SIZE;
|
||||
|
||||
loop {
|
||||
let step = range / K;
|
||||
if step == 0 {
|
||||
break;
|
||||
}
|
||||
debug_assert_eq!(range % K, 0);
|
||||
// Count how many segment-end pivots are < target (branchless, unrolled).
|
||||
let mut count = 0usize;
|
||||
for i in 1..K {
|
||||
count += (arr[base + i * step - 1] < target) as usize;
|
||||
}
|
||||
base += count * step;
|
||||
range = step;
|
||||
}
|
||||
start
|
||||
|
||||
// Linear scan over the ≤K remaining elements.
|
||||
let mut count = 0usize;
|
||||
for i in 0..range {
|
||||
count += (arr[base + i] < target) as usize;
|
||||
}
|
||||
base + count
|
||||
}
|
||||
|
||||
/// entry point used by postings; implemented as an 8-ary branchless search.
|
||||
#[inline]
|
||||
pub fn search_block(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
|
||||
kary_search::<8>(arr, target)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -39,7 +81,7 @@ mod tests {
|
||||
|
||||
use proptest::prelude::*;
|
||||
|
||||
use super::branchless_binary_search;
|
||||
use super::{kary_search, search_block};
|
||||
use crate::docset::TERMINATED;
|
||||
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
||||
|
||||
@@ -57,7 +99,7 @@ mod tests {
|
||||
assert_eq!(block.len(), COMPRESSION_BLOCK_SIZE);
|
||||
let mut output_buffer = [TERMINATED; COMPRESSION_BLOCK_SIZE];
|
||||
output_buffer[..block.len()].copy_from_slice(block);
|
||||
assert_eq!(branchless_binary_search(&output_buffer, target), cursor);
|
||||
assert_eq!(search_block(&output_buffer, target), cursor);
|
||||
}
|
||||
|
||||
fn util_test_search_in_block_all(block: &[u32]) {
|
||||
@@ -80,6 +122,44 @@ mod tests {
|
||||
util_test_search_in_block_all(&v[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_in_branchless_binary_search_corner_cases() {
|
||||
let all_same = vec![7u32; COMPRESSION_BLOCK_SIZE];
|
||||
util_test_search_in_block_all(&all_same);
|
||||
|
||||
let repeated_across_pivots: Vec<u32> = (0..COMPRESSION_BLOCK_SIZE)
|
||||
.map(|i| (i / 17) as u32)
|
||||
.collect();
|
||||
util_test_search_in_block_all(&repeated_across_pivots);
|
||||
|
||||
let mut padded_last_block = vec![0u32; COMPRESSION_BLOCK_SIZE];
|
||||
for (i, value) in padded_last_block.iter_mut().enumerate() {
|
||||
*value = if i < COMPRESSION_BLOCK_SIZE / 2 {
|
||||
i as u32
|
||||
} else {
|
||||
TERMINATED
|
||||
};
|
||||
}
|
||||
util_test_search_in_block_all(&padded_last_block);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_kary_search_allowed_branching_factors() {
|
||||
let mut block = [TERMINATED; COMPRESSION_BLOCK_SIZE];
|
||||
for (idx, value) in block.iter_mut().enumerate() {
|
||||
*value = (idx / 3) as u32;
|
||||
}
|
||||
|
||||
for target in [0, 1, 17, block[COMPRESSION_BLOCK_SIZE - 1]] {
|
||||
let expected = search_in_block_trivial_but_slow(&block, target);
|
||||
assert_eq!(kary_search::<2>(&block, target), expected);
|
||||
assert_eq!(kary_search::<4>(&block, target), expected);
|
||||
assert_eq!(kary_search::<8>(&block, target), expected);
|
||||
assert_eq!(kary_search::<16>(&block, target), expected);
|
||||
assert_eq!(kary_search::<32>(&block, target), expected);
|
||||
}
|
||||
}
|
||||
|
||||
fn monotonous_block() -> impl Strategy<Value = Vec<u32>> {
|
||||
prop::collection::vec(0u32..5u32, COMPRESSION_BLOCK_SIZE).prop_map(|mut deltas| {
|
||||
let mut el = 0;
|
||||
|
||||
@@ -158,7 +158,7 @@ impl BlockDecoder {
|
||||
/// Uses the padded buffer to enable branchless search.
|
||||
#[inline]
|
||||
pub(crate) fn seek_within_block(&self, target: u32) -> usize {
|
||||
crate::postings::branchless_binary_search(&self.output, target)
|
||||
crate::postings::search_block(&self.output, target)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
mod block_search;
|
||||
|
||||
pub(crate) use self::block_search::branchless_binary_search;
|
||||
pub(crate) use self::block_search::search_block;
|
||||
|
||||
mod block_segment_postings;
|
||||
pub(crate) mod compression;
|
||||
|
||||
Reference in New Issue
Block a user