Speed up range-query intersections via seek_danger on RangeDocSet (up to ~50x faster)

A regular seek on RangeDocSet is costly: on a miss it fetches blocks and scans the column forward to materialize the next matching doc. As a non-leading docset in an intersection that work is wasted — the driver only asks "does this candidate match?". seek_danger answers that with a cheap point lookup via Column::values_for_doc, returning a lower bound on a miss and leaving forward progress to the caller. Forward seek_danger through ConstScorer. Benchmarks (bool_queries_with_range, _all_results / DocSetCollector): ``` dense and 0.1% a a_AND_num_rand:[0_TO_9]_all_results Avg: 0.0827ms (-4.60%) Median: 0.0825ms (-4.82%) [0.0809ms .. 0.0891ms] Output: 43 a_AND_num_asc:[0_TO_9]_all_results Avg: 0.1937ms (-3.70%) Median: 0.1930ms (-3.59%) [0.1806ms .. 0.2044ms] Output: 100 a_AND_num_rand_fast:[0_TO_9]_all_results Avg: 0.0367ms (-92.67%) Median: 0.0365ms (-92.65%) [0.0340ms .. 0.0398ms] Output: 43 a_AND_num_asc_fast:[0_TO_9]_all_results Avg: 0.1052ms (-98.05%) Median: 0.1050ms (-97.98%) [0.1009ms .. 0.1117ms] Output: 100 num_rand_fast:[0_TO_9]_AND_num_asc_fast:[0_TO_9]_all_results Avg: 2.7147ms (-51.42%) Median: 2.7075ms (-49.58%) [2.6806ms .. 2.7799ms] Output: 968 dense and 1% a a_AND_num_rand:[0_TO_9]_all_results Avg: 0.4373ms (-9.71%) Median: 0.4357ms (-10.12%) [0.4117ms .. 0.4711ms] Output: 463 a_AND_num_asc:[0_TO_9]_all_results Avg: 0.2342ms (-2.50%) Median: 0.2338ms (-2.56%) [0.2247ms .. 0.2452ms] Output: 1_054 a_AND_num_rand_fast:[0_TO_9]_all_results Avg: 0.3956ms (-82.86%) Median: 0.3943ms (-82.90%) [0.3815ms .. 0.4119ms] Output: 463 a_AND_num_asc_fast:[0_TO_9]_all_results Avg: 0.4896ms (-91.16%) Median: 0.4862ms (-90.81%) [0.4797ms .. 0.5084ms] Output: 1_054 num_rand_fast:[0_TO_9]_AND_num_asc_fast:[0_TO_9]_all_results Avg: 2.7108ms (-50.81%) Median: 2.6925ms (-49.51%) [2.6688ms .. 2.7868ms] Output: 968 dense and 10% a a_AND_num_rand:[0_TO_9]_all_results Avg: 0.9869ms (-3.71%) Median: 0.9833ms (-3.83%) [0.9518ms .. 1.1218ms] Output: 4_914 a_AND_num_asc:[0_TO_9]_all_results Avg: 0.6352ms (-3.74%) Median: 0.6363ms (-3.32%) [0.6158ms .. 0.6488ms] Output: 10_152 a_AND_num_rand_fast:[0_TO_9]_all_results Avg: 3.1264ms (+0.39%) Median: 3.1466ms (+1.34%) [3.0261ms .. 3.2051ms] Output: 4_914 a_AND_num_asc_fast:[0_TO_9]_all_results Avg: 4.1547ms (-31.12%) Median: 4.0933ms (-28.55%) [3.7648ms .. 4.7600ms] Output: 10_152 num_rand_fast:[0_TO_9]_AND_num_asc_fast:[0_TO_9]_all_results Avg: 2.6973ms (-52.30%) Median: 2.6901ms (-49.86%) [2.6689ms .. 2.7677ms] Output: 968 ``` Gains are largest when the range query is the non-leading docset of a low-cardinality intersection.
fix term aggregation u32::MAX overflow issue
2026-06-29 05:40:42 +00:00 · 2026-06-22 11:47:28 +01:00 · 2026-06-18 17:07:43 +08:00
10 changed files with 250 additions and 131 deletions
--- a/benches/bool_queries_with_range.rs
+++ b/benches/bool_queries_with_range.rs
@@ -2,7 +2,7 @@ use binggan::{black_box, BenchGroup, BenchRunner};
 use rand::prelude::*;
 use rand::rngs::StdRng;
 use rand::SeedableRng;
-use tantivy::collector::{Collector, Count, DocSetCollector, TopDocs};
+use tantivy::collector::{Collector, Count, TopDocs};
 use tantivy::query::{Query, QueryParser};
 use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
 use tantivy::{doc, Index, Order, ReloadPolicy, Searcher};
@@ -110,43 +110,39 @@ fn main() {
    // Prepare corpora with varying scenarios
    let scenarios = vec![
        (
-            "dense and 99% a".to_string(),
-            10_000_000,
-            0.99,
+            "dense and 0.1% a".to_string(),
+            5_000_000,
+            0.001,
            "dense",
            0,
            9,
        ),
+        ("dense and 1% a".to_string(), 5_000_000, 0.01, "dense", 0, 9),
+        ("dense and 10% a".to_string(), 5_000_000, 0.1, "dense", 0, 9),
        (
-            "dense and 99% a".to_string(),
-            10_000_000,
-            0.99,
+            "dense and 50% a".to_string(),
+            5_000_000,
+            0.5,
            "dense",
-            990,
-            999,
+            0,
+            500,
        ),
        (
-            "sparse and 99% a".to_string(),
-            10_000_000,
+            "sparse and 50% a".to_string(),
+            5_000_000,
            0.99,
            "sparse",
            0,
            9,
        ),
-        (
-            "sparse and 99% a".to_string(),
-            10_000_000,
-            0.99,
-            "sparse",
-            9_999_990,
-            9_999_999,
-        ),
    ];

    let mut runner = BenchRunner::new();
-    for (scenario_id, n, p_title_a, num_rand_distribution, range_low, range_high) in scenarios {
+    for (scenario_id, num_docs, p_title_a, num_rand_distribution, range_low, range_high) in
+        scenarios
+    {
        // Build index for this scenario
-        let bench_index = build_shared_indices(n, p_title_a, num_rand_distribution);
+        let bench_index = build_shared_indices(num_docs, p_title_a, num_rand_distribution);

        // Create benchmark group
        let mut group = runner.new_group();
@@ -158,7 +154,7 @@ fn main() {
        let field_names = ["num_rand", "num_asc", "num_rand_fast", "num_asc_fast"];

        // Define the three terms we want to test with
-        let terms = ["a", "b", "z"];
+        let terms = ["a"];

        // Generate all combinations of terms and field names
        let mut queries = Vec::new();
@@ -202,8 +198,8 @@ fn run_benchmark_tasks(
        bench_group,
        bench_index,
        query_str,
-        DocSetCollector,
-        "all results",
+        (Count, TopDocs::with_limit(1000).order_by_score()),
+        "all_results",
    );

    // Test top 100 by the field (if it's a FAST field)
@@ -269,6 +265,10 @@ impl<C: Collector> SearchTask<C> {
            .downcast_ref::<Vec<(Option<u64>, tantivy::DocAddress)>>()
        {
            top_docs.len()
+        } else if let Some(top_docs_with_count) = (&result as &dyn std::any::Any)
+            .downcast_ref::<(usize, Vec<(f32, tantivy::DocAddress)>)>()
+        {
+            top_docs_with_count.0
        } else if let Some(top_docs) =
            (&result as &dyn std::any::Any).downcast_ref::<Vec<(u64, tantivy::DocAddress)>>()
        {
--- a/src/aggregation/bucket/composite/collector.rs
+++ b/src/aggregation/bucket/composite/collector.rs
@@ -275,7 +275,7 @@ impl SegmentCompositeCollector {
            dict.insert(
                key,
                IntermediateCompositeBucketEntry {
-                    doc_count: agg.count,
+                    doc_count: agg.count as u64,
                    sub_aggregation: sub_aggregation_res,
                },
            );
--- a/src/aggregation/bucket/term_agg/mod.rs
+++ b/src/aggregation/bucket/term_agg/mod.rs
@@ -957,7 +957,7 @@ fn into_intermediate_bucket_entry(
        )?;
    }
    Ok(IntermediateTermBucketEntry {
-        doc_count: bucket.count,
+        doc_count: bucket.count as u64,
        sub_aggregation: sub_aggregation_res,
    })
 }
--- a/src/aggregation/bucket/term_missing_agg.rs
+++ b/src/aggregation/bucket/term_missing_agg.rs
@@ -98,7 +98,7 @@ impl SegmentAggregationCollector for TermMissingAgg {

        let missing_count = &self.missing_count_per_bucket[parent_bucket_id as usize];
        let mut missing_entry = IntermediateTermBucketEntry {
-            doc_count: missing_count.missing_count,
+            doc_count: missing_count.missing_count as u64,
            sub_aggregation: Default::default(),
        };
        if let Some(sub_agg) = &mut self.sub_agg {
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -930,7 +930,7 @@ impl IntermediateRangeBucketEntry {
 #[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
 pub struct IntermediateTermBucketEntry {
    /// The number of documents in the bucket.
-    pub doc_count: u32,
+    pub doc_count: u64,
    /// The sub_aggregation in this bucket.
    pub sub_aggregation: IntermediateAggregationResults,
 }
@@ -1240,6 +1240,24 @@ mod tests {
        assert_eq!(tree_left, tree_expected);
    }

+    #[test]
+    fn test_term_bucket_doc_count_no_u32_overflow() {
+        // Two segments each contributing (u32::MAX - 100) docs to the same term. Summing them
+        // overflowed when doc_count was u32.
+        let per_segment = u32::MAX as u64 - 100;
+        let mut entry = IntermediateTermBucketEntry {
+            doc_count: per_segment,
+            sub_aggregation: Default::default(),
+        };
+        entry
+            .merge_fruits(IntermediateTermBucketEntry {
+                doc_count: per_segment,
+                sub_aggregation: Default::default(),
+            })
+            .unwrap();
+        assert_eq!(entry.doc_count, per_segment * 2);
+    }
+
    #[test]
    fn test_merge_fruits_tree_empty() {
        let mut tree_left = get_intermediate_tree_with_ranges(&[
--- a/src/postings/block_search.rs
+++ b/src/postings/block_search.rs
@@ -1,68 +1,36 @@
 use crate::postings::compression::COMPRESSION_BLOCK_SIZE;

-/// Returns the index of the first element in `arr` that is greater than or
-/// equal to `target`.
+/// Search the first index containing an element greater or equal to
+/// the target.
 ///
-/// This is equivalent to:
-///
-/// ```ignore
-/// arr.iter().take_while(|&&val| val < target).count()
+/// The results should be equivalent to
+/// ```compile_fail
+/// block[..]
+//       .iter()
+//       .take_while(|&&val| val < target)
+//       .count()
 /// ```
+/// 
+/// the `start` argument is just used to hint that the response is
+/// greater than beyond `start`. The implementation may or may not use
+/// it for optimization.
 ///
-/// # Assumptions
+/// # Assumption
 ///
-/// - `arr` is sorted in nondecreasing order. Values may be repeated; the last block is often padded
-///   with duplicates of its final value.
-/// - `target` is less than or equal to the last element in `arr`, so the result is always a valid
-///   index into the block.
-///
-/// # `K`
-///
-/// `K` is the branching factor. Each reduction probes `K - 1` segment-end
-/// pivots, keeps the matching segment, and finally linearly scans the remaining
-/// range. `K` must be one of `2`, `4`, `8`, `16`, `32`, or `64`.
-///
-/// The core idea vs a traditional binary search is that we can very cheaply scan blocks of
-/// numbers, since they are already in the CPU cache line.
-#[inline(always)]
-pub fn kary_search<const K: usize>(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
-    const {
-        assert!(
-            matches!(K, 2 | 4 | 8 | 16 | 32 | 64),
-            "K must be one of 2, 4, 8, 16, 32, or 64"
-        );
-    };
-
-    let mut base = 0usize;
-    let mut range = COMPRESSION_BLOCK_SIZE;
-
-    loop {
-        let step = range / K;
-        if step == 0 {
-            break;
+/// - The block is sorted. Some elements may appear several times. This is the case at the
+///   end of the last block for instance.
+/// - The target is assumed smaller or equal to the last element of the block.
+pub fn branchless_binary_search(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
+    let mut start = 0;
+    let mut len = arr.len();
+    for _ in 0..7 {
+        len /= 2;
+        let pivot = unsafe { *arr.get_unchecked(start + len - 1) };
+        if pivot < target {
+            start += len;
        }
-        debug_assert_eq!(range % K, 0);
-        // Count how many segment-end pivots are < target (branchless, unrolled).
-        let mut count = 0usize;
-        for i in 1..K {
-            count += (unsafe { *arr.get_unchecked(base + i * step - 1) } < target) as usize;
-        }
-        base += count * step;
-        range = step;
    }
-
-    // Linear scan over the ≤K remaining elements.
-    let mut count = 0usize;
-    for i in 0..range {
-        count += (unsafe { *arr.get_unchecked(base + i) } < target) as usize;
-    }
-    base + count
-}
-
-/// entry point used by postings; implemented as an 8-ary branchless search.
-#[inline]
-pub fn search_block(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize {
-    kary_search::<8>(arr, target)
+    start
 }

 #[cfg(test)]
@@ -71,7 +39,7 @@ mod tests {

    use proptest::prelude::*;

-    use super::{kary_search, search_block};
+    use super::branchless_binary_search;
    use crate::docset::TERMINATED;
    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;

@@ -89,7 +57,7 @@ mod tests {
        assert_eq!(block.len(), COMPRESSION_BLOCK_SIZE);
        let mut output_buffer = [TERMINATED; COMPRESSION_BLOCK_SIZE];
        output_buffer[..block.len()].copy_from_slice(block);
-        assert_eq!(search_block(&output_buffer, target), cursor);
+        assert_eq!(branchless_binary_search(&output_buffer, target), cursor);
    }

    fn util_test_search_in_block_all(block: &[u32]) {
@@ -112,45 +80,6 @@ mod tests {
        util_test_search_in_block_all(&v[..]);
    }

-    #[test]
-    fn test_search_in_branchless_binary_search_corner_cases() {
-        let all_same = vec![7u32; COMPRESSION_BLOCK_SIZE];
-        util_test_search_in_block_all(&all_same);
-
-        let repeated_across_pivots: Vec<u32> = (0..COMPRESSION_BLOCK_SIZE)
-            .map(|i| (i / 17) as u32)
-            .collect();
-        util_test_search_in_block_all(&repeated_across_pivots);
-
-        let mut padded_last_block = vec![0u32; COMPRESSION_BLOCK_SIZE];
-        for (i, value) in padded_last_block.iter_mut().enumerate() {
-            *value = if i < COMPRESSION_BLOCK_SIZE / 2 {
-                i as u32
-            } else {
-                TERMINATED
-            };
-        }
-        util_test_search_in_block_all(&padded_last_block);
-    }
-
-    #[test]
-    fn test_kary_search_allowed_branching_factors() {
-        let mut block = [TERMINATED; COMPRESSION_BLOCK_SIZE];
-        for (idx, value) in block.iter_mut().enumerate() {
-            *value = (idx / 3) as u32;
-        }
-
-        for target in [0, 1, 17, block[COMPRESSION_BLOCK_SIZE - 1]] {
-            let expected = search_in_block_trivial_but_slow(&block, target);
-            assert_eq!(kary_search::<2>(&block, target), expected);
-            assert_eq!(kary_search::<4>(&block, target), expected);
-            assert_eq!(kary_search::<8>(&block, target), expected);
-            assert_eq!(kary_search::<16>(&block, target), expected);
-            assert_eq!(kary_search::<32>(&block, target), expected);
-            assert_eq!(kary_search::<64>(&block, target), expected);
-        }
-    }
-
    fn monotonous_block() -> impl Strategy<Value = Vec<u32>> {
        prop::collection::vec(0u32..5u32, COMPRESSION_BLOCK_SIZE).prop_map(|mut deltas| {
            let mut el = 0;
--- a/src/postings/compression/mod.rs
+++ b/src/postings/compression/mod.rs
@@ -158,7 +158,7 @@ impl BlockDecoder {
    /// Uses the padded buffer to enable branchless search.
    #[inline]
    pub(crate) fn seek_within_block(&self, target: u32) -> usize {
-        crate::postings::search_block(&self.output, target)
+        crate::postings::branchless_binary_search(&self.output, target)
    }

    #[inline]
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -2,7 +2,7 @@

 mod block_search;

-pub(crate) use self::block_search::search_block;
+pub(crate) use self::block_search::branchless_binary_search;

 mod block_segment_postings;
 pub(crate) mod compression;
--- a/src/query/const_score_query.rs
+++ b/src/query/const_score_query.rs
@@ -1,6 +1,6 @@
 use std::fmt;

-use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
+use crate::docset::{SeekDangerResult, COLLECT_BLOCK_BUFFER_LEN};
 use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
 use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, Term};

@@ -119,6 +119,10 @@ impl<TDocSet: DocSet> DocSet for ConstScorer<TDocSet> {
        self.docset.seek(target)
    }

+    fn seek_danger(&mut self, target: DocId) -> SeekDangerResult {
+        self.docset.seek_danger(target)
+    }
+
    fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
        self.docset.fill_buffer(buffer)
    }
--- a/src/query/range_query/fast_field_range_doc_set.rs
+++ b/src/query/range_query/fast_field_range_doc_set.rs
@@ -3,6 +3,7 @@ use std::ops::RangeInclusive;

 use columnar::Column;

+use crate::docset::SeekDangerResult;
 use crate::{DocId, DocSet, TERMINATED};

 /// Helper to have a cursor over a vec of docids
@@ -184,6 +185,37 @@ impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> DocSet for RangeDocSe
        doc
    }

+    /// `seek_danger` only needs to answer whether `target` itself matches, so it does a cheap
+    /// point lookup on the column instead of scanning forward to materialize the next match (the
+    /// expensive part of a regular `seek`).
+    fn seek_danger(&mut self, target: DocId) -> SeekDangerResult {
+        // Covers `target == TERMINATED` and any target past the last doc: no match is possible.
+        if target >= self.column.num_docs() {
+            return SeekDangerResult::SeekLowerBound(TERMINATED);
+        }
+
+        if self.is_last_seek_distance_large(target) {
+            self.reset_fetch_range();
+        }
+        self.last_seek_pos_opt = Some(target);
+
+        let is_match = self
+            .column
+            .values_for_doc(target)
+            .any(|value| self.value_range.contains(&value));
+        if is_match {
+            // Leave the docset in a valid state positioned on `target`, so `doc()` returns it and a
+            // following `advance()` resumes the scan right after it.
+            self.loaded_docs.get_cleared_data().push(target);
+            self.next_fetch_start = target + 1;
+            SeekDangerResult::Found
+        } else {
+            // `target` is not in the docset. The next match is strictly greater than `target`, so
+            // `target + 1` is a valid lower bound. We may leave the docset in an invalid state.
+            SeekDangerResult::SeekLowerBound(target + 1)
+        }
+    }
+
    fn size_hint(&self) -> u32 {
        // TODO: Implement a better size hint
        self.column.num_docs() / 10
@@ -209,12 +241,148 @@ impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> DocSet for RangeDocSe

 #[cfg(test)]
 mod tests {
-    use std::ops::Bound;
+    use std::ops::{Bound, RangeInclusive};

+    use columnar::Column;
+
+    use super::RangeDocSet;
    use crate::collector::Count;
    use crate::directory::RamDirectory;
+    use crate::docset::{SeekDangerResult, TERMINATED};
    use crate::query::RangeQuery;
-    use crate::{schema, IndexBuilder, TantivyDocument, Term};
+    use crate::{schema, DocSet, Index, IndexBuilder, TantivyDocument, Term};
+
+    /// Builds a single-segment index where doc `i` carries `values_for_doc(i)` in a u64 fast
+    /// field, then returns its column so we can drive a `RangeDocSet` directly.
+    fn build_u64_column(
+        num_docs: usize,
+        values_for_doc: impl Fn(usize) -> Vec<u64>,
+    ) -> Column<u64> {
+        let mut schema_builder = schema::SchemaBuilder::new();
+        let value_field = schema_builder.add_u64_field("value", schema::FAST);
+        let index = Index::create_in_ram(schema_builder.build());
+        {
+            let mut writer = index.writer_for_tests().unwrap();
+            for i in 0..num_docs {
+                let mut doc = TantivyDocument::new();
+                for v in values_for_doc(i) {
+                    doc.add_u64(value_field, v);
+                }
+                writer.add_document(doc).unwrap();
+            }
+            writer.commit().unwrap();
+        }
+        let searcher = index.reader().unwrap().searcher();
+        assert_eq!(searcher.segment_readers().len(), 1);
+        searcher
+            .segment_reader(0)
+            .fast_fields()
+            .u64("value")
+            .unwrap()
+    }
+
+    fn range_docset(
+        value_range: RangeInclusive<u64>,
+        num_docs: usize,
+        values_for_doc: impl Fn(usize) -> Vec<u64>,
+    ) -> RangeDocSet<u64> {
+        RangeDocSet::new(value_range, build_u64_column(num_docs, values_for_doc))
+    }
+
+    #[test]
+    fn seek_danger_found_leaves_valid_state() {
+        // Even docs match the range, odd docs do not.
+        let mut docset = range_docset(0..=0, 100, |i| vec![(i % 2) as u64]);
+
+        // Matching target: `Found`, and the docset is positioned exactly on it.
+        assert_eq!(docset.seek_danger(10), SeekDangerResult::Found);
+        assert_eq!(docset.doc(), 10);
+        // A following advance resumes the scan right after the found doc.
+        assert_eq!(docset.advance(), 12);
+        assert_eq!(docset.doc(), 12);
+    }
+
+    #[test]
+    fn seek_danger_miss_returns_lower_bound() {
+        let mut docset = range_docset(0..=0, 100, |i| vec![(i % 2) as u64]);
+
+        // Odd target does not match: lower bound is strictly greater than the target and never
+        // skips past the next real match (here doc 12, the first even doc after 11).
+        match docset.seek_danger(11) {
+            SeekDangerResult::SeekLowerBound(lower_bound) => {
+                assert!(lower_bound > 11);
+                assert!(lower_bound <= 12);
+            }
+            SeekDangerResult::Found => panic!("11 should not match"),
+        }
+        // After a miss we may be in an invalid state; another seek_danger recovers it.
+        assert_eq!(docset.seek_danger(12), SeekDangerResult::Found);
+        assert_eq!(docset.doc(), 12);
+    }
+
+    #[test]
+    fn seek_danger_terminated_and_out_of_bounds() {
+        let mut docset = range_docset(0..=0, 10, |i| vec![(i % 2) as u64]);
+        assert_eq!(
+            docset.seek_danger(TERMINATED),
+            SeekDangerResult::SeekLowerBound(TERMINATED)
+        );
+        // A target past the last doc has no possible match either.
+        assert_eq!(
+            docset.seek_danger(10),
+            SeekDangerResult::SeekLowerBound(TERMINATED)
+        );
+    }
+
+    #[test]
+    fn seek_danger_multivalued() {
+        // Doc `i` holds values [i, i+1]; the range {5} matches docs 4 and 5.
+        let mut docset = range_docset(5..=5, 20, |i| vec![i as u64, i as u64 + 1]);
+
+        assert_eq!(docset.seek_danger(4), SeekDangerResult::Found);
+        assert_eq!(docset.doc(), 4);
+        assert_eq!(docset.advance(), 5);
+        // No further match after doc 5.
+        assert_eq!(docset.advance(), TERMINATED);
+    }
+
+    #[test]
+    fn seek_danger_matches_seek() {
+        // Cross-check seek_danger against the true next match for every target, on a column with a
+        // few sparse matches.
+        let matches = [3u32, 7, 50, 51, 99];
+        let num_docs = 100;
+        let values_for_doc = |i: usize| {
+            vec![if matches.contains(&(i as u32)) {
+                1u64
+            } else {
+                0u64
+            }]
+        };
+
+        for target in 0..num_docs as u32 {
+            // The first matching doc greater than or equal to `target`, i.e. what `seek` returns.
+            let expected = matches
+                .iter()
+                .copied()
+                .find(|&m| m >= target)
+                .unwrap_or(TERMINATED);
+
+            let mut danger = range_docset(1..=1, num_docs, values_for_doc);
+            match danger.seek_danger(target) {
+                SeekDangerResult::Found => {
+                    assert_eq!(expected, target, "target {target} reported Found");
+                    assert_eq!(danger.doc(), target);
+                }
+                SeekDangerResult::SeekLowerBound(lower_bound) => {
+                    assert_ne!(expected, target, "target {target} should have been Found");
+                    assert!(lower_bound > target);
+                    // The lower bound must never skip past the true next match.
+                    assert!(lower_bound <= expected);
+                }
+            }
+        }
+    }

    #[test]
    fn range_query_fast_optional_field_minimum() {