mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-29 05:40:42 +00:00
Compare commits
2 Commits
seek_dange
...
seek_dange
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca0ed87d79 | ||
|
|
1e859fd78d |
@@ -2,7 +2,7 @@ use binggan::{black_box, BenchGroup, BenchRunner};
|
||||
use rand::prelude::*;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::SeedableRng;
|
||||
use tantivy::collector::{Collector, Count, DocSetCollector, TopDocs};
|
||||
use tantivy::collector::{Collector, Count, TopDocs};
|
||||
use tantivy::query::{Query, QueryParser};
|
||||
use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
|
||||
use tantivy::{doc, Index, Order, ReloadPolicy, Searcher};
|
||||
@@ -110,43 +110,39 @@ fn main() {
|
||||
// Prepare corpora with varying scenarios
|
||||
let scenarios = vec![
|
||||
(
|
||||
"dense and 99% a".to_string(),
|
||||
10_000_000,
|
||||
0.99,
|
||||
"dense and 0.1% a".to_string(),
|
||||
5_000_000,
|
||||
0.001,
|
||||
"dense",
|
||||
0,
|
||||
9,
|
||||
),
|
||||
("dense and 1% a".to_string(), 5_000_000, 0.01, "dense", 0, 9),
|
||||
("dense and 10% a".to_string(), 5_000_000, 0.1, "dense", 0, 9),
|
||||
(
|
||||
"dense and 99% a".to_string(),
|
||||
10_000_000,
|
||||
0.99,
|
||||
"dense and 50% a".to_string(),
|
||||
5_000_000,
|
||||
0.5,
|
||||
"dense",
|
||||
990,
|
||||
999,
|
||||
0,
|
||||
500,
|
||||
),
|
||||
(
|
||||
"sparse and 99% a".to_string(),
|
||||
10_000_000,
|
||||
"sparse and 50% a".to_string(),
|
||||
5_000_000,
|
||||
0.99,
|
||||
"sparse",
|
||||
0,
|
||||
9,
|
||||
),
|
||||
(
|
||||
"sparse and 99% a".to_string(),
|
||||
10_000_000,
|
||||
0.99,
|
||||
"sparse",
|
||||
9_999_990,
|
||||
9_999_999,
|
||||
),
|
||||
];
|
||||
|
||||
let mut runner = BenchRunner::new();
|
||||
for (scenario_id, n, p_title_a, num_rand_distribution, range_low, range_high) in scenarios {
|
||||
for (scenario_id, num_docs, p_title_a, num_rand_distribution, range_low, range_high) in
|
||||
scenarios
|
||||
{
|
||||
// Build index for this scenario
|
||||
let bench_index = build_shared_indices(n, p_title_a, num_rand_distribution);
|
||||
let bench_index = build_shared_indices(num_docs, p_title_a, num_rand_distribution);
|
||||
|
||||
// Create benchmark group
|
||||
let mut group = runner.new_group();
|
||||
@@ -158,7 +154,7 @@ fn main() {
|
||||
let field_names = ["num_rand", "num_asc", "num_rand_fast", "num_asc_fast"];
|
||||
|
||||
// Define the three terms we want to test with
|
||||
let terms = ["a", "b", "z"];
|
||||
let terms = ["a"];
|
||||
|
||||
// Generate all combinations of terms and field names
|
||||
let mut queries = Vec::new();
|
||||
@@ -202,8 +198,8 @@ fn run_benchmark_tasks(
|
||||
bench_group,
|
||||
bench_index,
|
||||
query_str,
|
||||
DocSetCollector,
|
||||
"all results",
|
||||
(Count, TopDocs::with_limit(1000).order_by_score()),
|
||||
"all_results",
|
||||
);
|
||||
|
||||
// Test top 100 by the field (if it's a FAST field)
|
||||
@@ -269,6 +265,10 @@ impl<C: Collector> SearchTask<C> {
|
||||
.downcast_ref::<Vec<(Option<u64>, tantivy::DocAddress)>>()
|
||||
{
|
||||
top_docs.len()
|
||||
} else if let Some(top_docs_with_count) = (&result as &dyn std::any::Any)
|
||||
.downcast_ref::<(usize, Vec<(f32, tantivy::DocAddress)>)>()
|
||||
{
|
||||
top_docs_with_count.0
|
||||
} else if let Some(top_docs) =
|
||||
(&result as &dyn std::any::Any).downcast_ref::<Vec<(u64, tantivy::DocAddress)>>()
|
||||
{
|
||||
|
||||
@@ -275,7 +275,7 @@ impl SegmentCompositeCollector {
|
||||
dict.insert(
|
||||
key,
|
||||
IntermediateCompositeBucketEntry {
|
||||
doc_count: agg.count,
|
||||
doc_count: agg.count as u64,
|
||||
sub_aggregation: sub_aggregation_res,
|
||||
},
|
||||
);
|
||||
|
||||
@@ -957,7 +957,7 @@ fn into_intermediate_bucket_entry(
|
||||
)?;
|
||||
}
|
||||
Ok(IntermediateTermBucketEntry {
|
||||
doc_count: bucket.count,
|
||||
doc_count: bucket.count as u64,
|
||||
sub_aggregation: sub_aggregation_res,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -98,7 +98,7 @@ impl SegmentAggregationCollector for TermMissingAgg {
|
||||
|
||||
let missing_count = &self.missing_count_per_bucket[parent_bucket_id as usize];
|
||||
let mut missing_entry = IntermediateTermBucketEntry {
|
||||
doc_count: missing_count.missing_count,
|
||||
doc_count: missing_count.missing_count as u64,
|
||||
sub_aggregation: Default::default(),
|
||||
};
|
||||
if let Some(sub_agg) = &mut self.sub_agg {
|
||||
|
||||
@@ -930,7 +930,7 @@ impl IntermediateRangeBucketEntry {
|
||||
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct IntermediateTermBucketEntry {
|
||||
/// The number of documents in the bucket.
|
||||
pub doc_count: u32,
|
||||
pub doc_count: u64,
|
||||
/// The sub_aggregation in this bucket.
|
||||
pub sub_aggregation: IntermediateAggregationResults,
|
||||
}
|
||||
@@ -1240,6 +1240,24 @@ mod tests {
|
||||
assert_eq!(tree_left, tree_expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_term_bucket_doc_count_no_u32_overflow() {
|
||||
// Two segments each contributing (u32::MAX - 100) docs to the same term. Summing them
|
||||
// overflowed when doc_count was u32.
|
||||
let per_segment = u32::MAX as u64 - 100;
|
||||
let mut entry = IntermediateTermBucketEntry {
|
||||
doc_count: per_segment,
|
||||
sub_aggregation: Default::default(),
|
||||
};
|
||||
entry
|
||||
.merge_fruits(IntermediateTermBucketEntry {
|
||||
doc_count: per_segment,
|
||||
sub_aggregation: Default::default(),
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(entry.doc_count, per_segment * 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_fruits_tree_empty() {
|
||||
let mut tree_left = get_intermediate_tree_with_ranges(&[
|
||||
|
||||
@@ -1,68 +1,36 @@
|
||||
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
||||
|
||||
/// Returns the index of the first element in `arr` that is greater than or
|
||||
/// equal to `target`.
|
||||
/// Search the first index containing an element greater or equal to
|
||||
/// the target.
|
||||
///
|
||||
/// This is equivalent to:
|
||||
///
|
||||
/// ```ignore
|
||||
/// arr.iter().take_while(|&&val| val < target).count()
|
||||
/// The results should be equivalent to
|
||||
/// ```compile_fail
|
||||
/// block[..]
|
||||
// .iter()
|
||||
// .take_while(|&&val| val < target)
|
||||
// .count()
|
||||
/// ```
|
||||
///
|
||||
/// the `start` argument is just used to hint that the response is
|
||||
/// greater than beyond `start`. The implementation may or may not use
|
||||
/// it for optimization.
|
||||
///
|
||||
/// # Assumptions
|
||||
/// # Assumption
|
||||
///
|
||||
/// - `arr` is sorted in nondecreasing order. Values may be repeated; the last block is often padded
|
||||
/// with duplicates of its final value.
|
||||
/// - `target` is less than or equal to the last element in `arr`, so the result is always a valid
|
||||
/// index into the block.
|
||||
///
|
||||
/// # `K`
|
||||
///
|
||||
/// `K` is the branching factor. Each reduction probes `K - 1` segment-end
|
||||
/// pivots, keeps the matching segment, and finally linearly scans the remaining
|
||||
/// range. `K` must be one of `2`, `4`, `8`, `16`, `32`, or `64`.
|
||||
///
|
||||
/// The core idea vs a traditional binary search is that we can very cheaply scan blocks of
|
||||
/// numbers, since they are already in the CPU cache line.
|
||||
#[inline(always)]
|
||||
pub fn kary_search<const K: usize>(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
|
||||
const {
|
||||
assert!(
|
||||
matches!(K, 2 | 4 | 8 | 16 | 32 | 64),
|
||||
"K must be one of 2, 4, 8, 16, 32, or 64"
|
||||
);
|
||||
};
|
||||
|
||||
let mut base = 0usize;
|
||||
let mut range = COMPRESSION_BLOCK_SIZE;
|
||||
|
||||
loop {
|
||||
let step = range / K;
|
||||
if step == 0 {
|
||||
break;
|
||||
/// - The block is sorted. Some elements may appear several times. This is the case at the
|
||||
/// end of the last block for instance.
|
||||
/// - The target is assumed smaller or equal to the last element of the block.
|
||||
pub fn branchless_binary_search(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
|
||||
let mut start = 0;
|
||||
let mut len = arr.len();
|
||||
for _ in 0..7 {
|
||||
len /= 2;
|
||||
let pivot = unsafe { *arr.get_unchecked(start + len - 1) };
|
||||
if pivot < target {
|
||||
start += len;
|
||||
}
|
||||
debug_assert_eq!(range % K, 0);
|
||||
// Count how many segment-end pivots are < target (branchless, unrolled).
|
||||
let mut count = 0usize;
|
||||
for i in 1..K {
|
||||
count += (unsafe { *arr.get_unchecked(base + i * step - 1) } < target) as usize;
|
||||
}
|
||||
base += count * step;
|
||||
range = step;
|
||||
}
|
||||
|
||||
// Linear scan over the ≤K remaining elements.
|
||||
let mut count = 0usize;
|
||||
for i in 0..range {
|
||||
count += (unsafe { *arr.get_unchecked(base + i) } < target) as usize;
|
||||
}
|
||||
base + count
|
||||
}
|
||||
|
||||
/// entry point used by postings; implemented as an 8-ary branchless search.
|
||||
#[inline]
|
||||
pub fn search_block(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
|
||||
kary_search::<8>(arr, target)
|
||||
start
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -71,7 +39,7 @@ mod tests {
|
||||
|
||||
use proptest::prelude::*;
|
||||
|
||||
use super::{kary_search, search_block};
|
||||
use super::branchless_binary_search;
|
||||
use crate::docset::TERMINATED;
|
||||
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
||||
|
||||
@@ -89,7 +57,7 @@ mod tests {
|
||||
assert_eq!(block.len(), COMPRESSION_BLOCK_SIZE);
|
||||
let mut output_buffer = [TERMINATED; COMPRESSION_BLOCK_SIZE];
|
||||
output_buffer[..block.len()].copy_from_slice(block);
|
||||
assert_eq!(search_block(&output_buffer, target), cursor);
|
||||
assert_eq!(branchless_binary_search(&output_buffer, target), cursor);
|
||||
}
|
||||
|
||||
fn util_test_search_in_block_all(block: &[u32]) {
|
||||
@@ -112,45 +80,6 @@ mod tests {
|
||||
util_test_search_in_block_all(&v[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_in_branchless_binary_search_corner_cases() {
|
||||
let all_same = vec![7u32; COMPRESSION_BLOCK_SIZE];
|
||||
util_test_search_in_block_all(&all_same);
|
||||
|
||||
let repeated_across_pivots: Vec<u32> = (0..COMPRESSION_BLOCK_SIZE)
|
||||
.map(|i| (i / 17) as u32)
|
||||
.collect();
|
||||
util_test_search_in_block_all(&repeated_across_pivots);
|
||||
|
||||
let mut padded_last_block = vec![0u32; COMPRESSION_BLOCK_SIZE];
|
||||
for (i, value) in padded_last_block.iter_mut().enumerate() {
|
||||
*value = if i < COMPRESSION_BLOCK_SIZE / 2 {
|
||||
i as u32
|
||||
} else {
|
||||
TERMINATED
|
||||
};
|
||||
}
|
||||
util_test_search_in_block_all(&padded_last_block);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_kary_search_allowed_branching_factors() {
|
||||
let mut block = [TERMINATED; COMPRESSION_BLOCK_SIZE];
|
||||
for (idx, value) in block.iter_mut().enumerate() {
|
||||
*value = (idx / 3) as u32;
|
||||
}
|
||||
|
||||
for target in [0, 1, 17, block[COMPRESSION_BLOCK_SIZE - 1]] {
|
||||
let expected = search_in_block_trivial_but_slow(&block, target);
|
||||
assert_eq!(kary_search::<2>(&block, target), expected);
|
||||
assert_eq!(kary_search::<4>(&block, target), expected);
|
||||
assert_eq!(kary_search::<8>(&block, target), expected);
|
||||
assert_eq!(kary_search::<16>(&block, target), expected);
|
||||
assert_eq!(kary_search::<32>(&block, target), expected);
|
||||
assert_eq!(kary_search::<64>(&block, target), expected);
|
||||
}
|
||||
}
|
||||
|
||||
fn monotonous_block() -> impl Strategy<Value = Vec<u32>> {
|
||||
prop::collection::vec(0u32..5u32, COMPRESSION_BLOCK_SIZE).prop_map(|mut deltas| {
|
||||
let mut el = 0;
|
||||
|
||||
@@ -158,7 +158,7 @@ impl BlockDecoder {
|
||||
/// Uses the padded buffer to enable branchless search.
|
||||
#[inline]
|
||||
pub(crate) fn seek_within_block(&self, target: u32) -> usize {
|
||||
crate::postings::search_block(&self.output, target)
|
||||
crate::postings::branchless_binary_search(&self.output, target)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
mod block_search;
|
||||
|
||||
pub(crate) use self::block_search::search_block;
|
||||
pub(crate) use self::block_search::branchless_binary_search;
|
||||
|
||||
mod block_segment_postings;
|
||||
pub(crate) mod compression;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::fmt;
|
||||
|
||||
use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
|
||||
use crate::docset::{SeekDangerResult, COLLECT_BLOCK_BUFFER_LEN};
|
||||
use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
|
||||
use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, Term};
|
||||
|
||||
@@ -119,6 +119,10 @@ impl<TDocSet: DocSet> DocSet for ConstScorer<TDocSet> {
|
||||
self.docset.seek(target)
|
||||
}
|
||||
|
||||
fn seek_danger(&mut self, target: DocId) -> SeekDangerResult {
|
||||
self.docset.seek_danger(target)
|
||||
}
|
||||
|
||||
fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
|
||||
self.docset.fill_buffer(buffer)
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::ops::RangeInclusive;
|
||||
|
||||
use columnar::Column;
|
||||
|
||||
use crate::docset::SeekDangerResult;
|
||||
use crate::{DocId, DocSet, TERMINATED};
|
||||
|
||||
/// Helper to have a cursor over a vec of docids
|
||||
@@ -184,6 +185,37 @@ impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> DocSet for RangeDocSe
|
||||
doc
|
||||
}
|
||||
|
||||
/// `seek_danger` only needs to answer whether `target` itself matches, so it does a cheap
|
||||
/// point lookup on the column instead of scanning forward to materialize the next match (the
|
||||
/// expensive part of a regular `seek`).
|
||||
fn seek_danger(&mut self, target: DocId) -> SeekDangerResult {
|
||||
// Covers `target == TERMINATED` and any target past the last doc: no match is possible.
|
||||
if target >= self.column.num_docs() {
|
||||
return SeekDangerResult::SeekLowerBound(TERMINATED);
|
||||
}
|
||||
|
||||
if self.is_last_seek_distance_large(target) {
|
||||
self.reset_fetch_range();
|
||||
}
|
||||
self.last_seek_pos_opt = Some(target);
|
||||
|
||||
let is_match = self
|
||||
.column
|
||||
.values_for_doc(target)
|
||||
.any(|value| self.value_range.contains(&value));
|
||||
if is_match {
|
||||
// Leave the docset in a valid state positioned on `target`, so `doc()` returns it and a
|
||||
// following `advance()` resumes the scan right after it.
|
||||
self.loaded_docs.get_cleared_data().push(target);
|
||||
self.next_fetch_start = target + 1;
|
||||
SeekDangerResult::Found
|
||||
} else {
|
||||
// `target` is not in the docset. The next match is strictly greater than `target`, so
|
||||
// `target + 1` is a valid lower bound. We may leave the docset in an invalid state.
|
||||
SeekDangerResult::SeekLowerBound(target + 1)
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> u32 {
|
||||
// TODO: Implement a better size hint
|
||||
self.column.num_docs() / 10
|
||||
@@ -209,12 +241,148 @@ impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> DocSet for RangeDocSe
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ops::Bound;
|
||||
use std::ops::{Bound, RangeInclusive};
|
||||
|
||||
use columnar::Column;
|
||||
|
||||
use super::RangeDocSet;
|
||||
use crate::collector::Count;
|
||||
use crate::directory::RamDirectory;
|
||||
use crate::docset::{SeekDangerResult, TERMINATED};
|
||||
use crate::query::RangeQuery;
|
||||
use crate::{schema, IndexBuilder, TantivyDocument, Term};
|
||||
use crate::{schema, DocSet, Index, IndexBuilder, TantivyDocument, Term};
|
||||
|
||||
/// Builds a single-segment index where doc `i` carries `values_for_doc(i)` in a u64 fast
|
||||
/// field, then returns its column so we can drive a `RangeDocSet` directly.
|
||||
fn build_u64_column(
|
||||
num_docs: usize,
|
||||
values_for_doc: impl Fn(usize) -> Vec<u64>,
|
||||
) -> Column<u64> {
|
||||
let mut schema_builder = schema::SchemaBuilder::new();
|
||||
let value_field = schema_builder.add_u64_field("value", schema::FAST);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
{
|
||||
let mut writer = index.writer_for_tests().unwrap();
|
||||
for i in 0..num_docs {
|
||||
let mut doc = TantivyDocument::new();
|
||||
for v in values_for_doc(i) {
|
||||
doc.add_u64(value_field, v);
|
||||
}
|
||||
writer.add_document(doc).unwrap();
|
||||
}
|
||||
writer.commit().unwrap();
|
||||
}
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
assert_eq!(searcher.segment_readers().len(), 1);
|
||||
searcher
|
||||
.segment_reader(0)
|
||||
.fast_fields()
|
||||
.u64("value")
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn range_docset(
|
||||
value_range: RangeInclusive<u64>,
|
||||
num_docs: usize,
|
||||
values_for_doc: impl Fn(usize) -> Vec<u64>,
|
||||
) -> RangeDocSet<u64> {
|
||||
RangeDocSet::new(value_range, build_u64_column(num_docs, values_for_doc))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seek_danger_found_leaves_valid_state() {
|
||||
// Even docs match the range, odd docs do not.
|
||||
let mut docset = range_docset(0..=0, 100, |i| vec![(i % 2) as u64]);
|
||||
|
||||
// Matching target: `Found`, and the docset is positioned exactly on it.
|
||||
assert_eq!(docset.seek_danger(10), SeekDangerResult::Found);
|
||||
assert_eq!(docset.doc(), 10);
|
||||
// A following advance resumes the scan right after the found doc.
|
||||
assert_eq!(docset.advance(), 12);
|
||||
assert_eq!(docset.doc(), 12);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seek_danger_miss_returns_lower_bound() {
|
||||
let mut docset = range_docset(0..=0, 100, |i| vec![(i % 2) as u64]);
|
||||
|
||||
// Odd target does not match: lower bound is strictly greater than the target and never
|
||||
// skips past the next real match (here doc 12, the first even doc after 11).
|
||||
match docset.seek_danger(11) {
|
||||
SeekDangerResult::SeekLowerBound(lower_bound) => {
|
||||
assert!(lower_bound > 11);
|
||||
assert!(lower_bound <= 12);
|
||||
}
|
||||
SeekDangerResult::Found => panic!("11 should not match"),
|
||||
}
|
||||
// After a miss we may be in an invalid state; another seek_danger recovers it.
|
||||
assert_eq!(docset.seek_danger(12), SeekDangerResult::Found);
|
||||
assert_eq!(docset.doc(), 12);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seek_danger_terminated_and_out_of_bounds() {
|
||||
let mut docset = range_docset(0..=0, 10, |i| vec![(i % 2) as u64]);
|
||||
assert_eq!(
|
||||
docset.seek_danger(TERMINATED),
|
||||
SeekDangerResult::SeekLowerBound(TERMINATED)
|
||||
);
|
||||
// A target past the last doc has no possible match either.
|
||||
assert_eq!(
|
||||
docset.seek_danger(10),
|
||||
SeekDangerResult::SeekLowerBound(TERMINATED)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seek_danger_multivalued() {
|
||||
// Doc `i` holds values [i, i+1]; the range {5} matches docs 4 and 5.
|
||||
let mut docset = range_docset(5..=5, 20, |i| vec![i as u64, i as u64 + 1]);
|
||||
|
||||
assert_eq!(docset.seek_danger(4), SeekDangerResult::Found);
|
||||
assert_eq!(docset.doc(), 4);
|
||||
assert_eq!(docset.advance(), 5);
|
||||
// No further match after doc 5.
|
||||
assert_eq!(docset.advance(), TERMINATED);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seek_danger_matches_seek() {
|
||||
// Cross-check seek_danger against the true next match for every target, on a column with a
|
||||
// few sparse matches.
|
||||
let matches = [3u32, 7, 50, 51, 99];
|
||||
let num_docs = 100;
|
||||
let values_for_doc = |i: usize| {
|
||||
vec![if matches.contains(&(i as u32)) {
|
||||
1u64
|
||||
} else {
|
||||
0u64
|
||||
}]
|
||||
};
|
||||
|
||||
for target in 0..num_docs as u32 {
|
||||
// The first matching doc greater than or equal to `target`, i.e. what `seek` returns.
|
||||
let expected = matches
|
||||
.iter()
|
||||
.copied()
|
||||
.find(|&m| m >= target)
|
||||
.unwrap_or(TERMINATED);
|
||||
|
||||
let mut danger = range_docset(1..=1, num_docs, values_for_doc);
|
||||
match danger.seek_danger(target) {
|
||||
SeekDangerResult::Found => {
|
||||
assert_eq!(expected, target, "target {target} reported Found");
|
||||
assert_eq!(danger.doc(), target);
|
||||
}
|
||||
SeekDangerResult::SeekLowerBound(lower_bound) => {
|
||||
assert_ne!(expected, target, "target {target} should have been Found");
|
||||
assert!(lower_bound > target);
|
||||
// The lower bound must never skip past the true next match.
|
||||
assert!(lower_bound <= expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn range_query_fast_optional_field_minimum() {
|
||||
|
||||
Reference in New Issue
Block a user