diff --git a/common/src/bitset.rs b/common/src/bitset.rs index cf719e53a..d5a9a13c3 100644 --- a/common/src/bitset.rs +++ b/common/src/bitset.rs @@ -47,6 +47,9 @@ impl TinySet { TinySet(val) } + /// An empty `TinySet` constant. + pub const EMPTY: TinySet = TinySet(0u64); + /// Returns an empty `TinySet`. #[inline] pub fn empty() -> TinySet { diff --git a/src/collector/count_collector.rs b/src/collector/count_collector.rs index dcd102249..041c2957f 100644 --- a/src/collector/count_collector.rs +++ b/src/collector/count_collector.rs @@ -1,5 +1,6 @@ use super::Collector; use crate::collector::SegmentCollector; +use crate::query::Weight; use crate::{DocId, Score, SegmentOrdinal, SegmentReader}; /// `CountCollector` collector only counts how many @@ -55,6 +56,15 @@ impl Collector for Count { fn merge_fruits(&self, segment_counts: Vec) -> crate::Result { Ok(segment_counts.into_iter().sum()) } + + fn collect_segment( + &self, + weight: &dyn Weight, + _segment_ord: u32, + reader: &SegmentReader, + ) -> crate::Result { + Ok(weight.count(reader)? as usize) + } } #[derive(Default)] diff --git a/src/docset.rs b/src/docset.rs index 8e72281d2..c02bbbfc3 100644 --- a/src/docset.rs +++ b/src/docset.rs @@ -1,5 +1,7 @@ use std::borrow::{Borrow, BorrowMut}; +use common::TinySet; + use crate::fastfield::AliveBitSet; use crate::DocId; @@ -14,6 +16,12 @@ pub const TERMINATED: DocId = i32::MAX as u32; /// exactly this size as long as we can fill the buffer. pub const COLLECT_BLOCK_BUFFER_LEN: usize = 64; +/// Number of `TinySet` (64-bit) buckets in a block used by [`DocSet::fill_bitset_block`]. +pub const BLOCK_NUM_TINYBITSETS: usize = 16; + +/// Number of doc IDs covered by one block: `BLOCK_NUM_TINYBITSETS * 64 = 1024`. +pub const BLOCK_WINDOW: u32 = BLOCK_NUM_TINYBITSETS as u32 * 64; + /// Represents an iterable set of sorted doc ids. pub trait DocSet: Send { /// Goes to the next element. @@ -160,6 +168,31 @@ pub trait DocSet: Send { self.size_hint() as u64 } + /// Fills a bitmask representing which documents in `[min_doc, min_doc + BLOCK_WINDOW)` are + /// present in this docset. + /// + /// The window is divided into `BLOCK_NUM_TINYBITSETS` buckets of 64 docs each. + /// Returns the next doc `>= min_doc + BLOCK_WINDOW`, or `TERMINATED` if exhausted. + fn fill_bitset_block( + &mut self, + min_doc: DocId, + mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS], + ) -> DocId { + self.seek(min_doc); + let horizon = min_doc + BLOCK_WINDOW; + loop { + let doc = self.doc(); + if doc >= horizon { + return doc; + } + let delta = doc - min_doc; + mask[(delta / 64) as usize].insert_mut(delta % 64); + if self.advance() == TERMINATED { + return TERMINATED; + } + } + } + /// Returns the number documents matching. /// Calling this method consumes the `DocSet`. fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 { @@ -214,6 +247,18 @@ impl DocSet for &mut dyn DocSet { (**self).seek_danger(target) } + fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize { + (**self).fill_buffer(buffer) + } + + fn fill_bitset_block( + &mut self, + min_doc: DocId, + mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS], + ) -> DocId { + (**self).fill_bitset_block(min_doc, mask) + } + fn doc(&self) -> u32 { (**self).doc() } @@ -256,6 +301,15 @@ impl DocSet for Box { unboxed.fill_buffer(buffer) } + fn fill_bitset_block( + &mut self, + min_doc: DocId, + mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS], + ) -> DocId { + let unboxed: &mut TDocSet = self.borrow_mut(); + unboxed.fill_bitset_block(min_doc, mask) + } + fn doc(&self) -> DocId { let unboxed: &TDocSet = self.borrow(); unboxed.doc() diff --git a/src/query/intersection.rs b/src/query/intersection.rs index 64fcf78dd..7d90d09fc 100644 --- a/src/query/intersection.rs +++ b/src/query/intersection.rs @@ -1,5 +1,7 @@ +use common::TinySet; + use super::size_hint::estimate_intersection; -use crate::docset::{DocSet, SeekDangerResult, TERMINATED}; +use crate::docset::{DocSet, SeekDangerResult, BLOCK_NUM_TINYBITSETS, TERMINATED}; use crate::query::term_query::TermScorer; use crate::query::{EmptyScorer, Scorer}; use crate::{DocId, Score}; @@ -17,7 +19,7 @@ use crate::{DocId, Score}; /// `size_hint` of the intersection. pub fn intersect_scorers( mut scorers: Vec>, - num_docs_segment: u32, + segment_num_docs: u32, ) -> Box { if scorers.is_empty() { return Box::new(EmptyScorer); @@ -42,14 +44,14 @@ pub fn intersect_scorers( left: *(left.downcast::().map_err(|_| ()).unwrap()), right: *(right.downcast::().map_err(|_| ()).unwrap()), others: scorers, - num_docs: num_docs_segment, + segment_num_docs, }); } Box::new(Intersection { left, right, others: scorers, - num_docs: num_docs_segment, + segment_num_docs, }) } @@ -58,7 +60,7 @@ pub struct Intersection> left: TDocSet, right: TDocSet, others: Vec, - num_docs: u32, + segment_num_docs: u32, } fn go_to_first_doc(docsets: &mut [TDocSet]) -> DocId { @@ -78,7 +80,10 @@ fn go_to_first_doc(docsets: &mut [TDocSet]) -> DocId { impl Intersection { /// num_docs is the number of documents in the segment. - pub(crate) fn new(mut docsets: Vec, num_docs: u32) -> Intersection { + pub(crate) fn new( + mut docsets: Vec, + segment_num_docs: u32, + ) -> Intersection { let num_docsets = docsets.len(); assert!(num_docsets >= 2); docsets.sort_by_key(|docset| docset.cost()); @@ -97,7 +102,7 @@ impl Intersection { left, right, others: docsets, - num_docs, + segment_num_docs, } } } @@ -214,7 +219,7 @@ impl DocSet for Intersection DocSet for Intersection u32 { + const DENSITY_THRESHOLD_INVERSE: u32 = 32; + if self + .left + .size_hint() + .saturating_mul(DENSITY_THRESHOLD_INVERSE) + < self.segment_num_docs + { + // Sparse path: if the lead iterator covers less than ~3% of docs, + // the block approach wastes time on mostly-empty blocks. + self.count_including_deleted_sparse() + } else { + // Dense approach. We push documents into a block bitset to then + // perform count using popcount. + self.count_including_deleted_dense() + } + } +} + +const EMPTY_BLOCK: [TinySet; BLOCK_NUM_TINYBITSETS] = [TinySet::EMPTY; BLOCK_NUM_TINYBITSETS]; + +/// ANDs `other` into `mask` in-place. Returns `true` if the result is all zeros. +#[inline] +fn and_blocks_and_return_is_empty( + mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS], + update: &[TinySet; BLOCK_NUM_TINYBITSETS], +) -> bool { + let mut all_empty = true; + for (mask_tinyset, update_tinyset) in mask.iter_mut().zip(update.iter()) { + *mask_tinyset = mask_tinyset.intersect(*update_tinyset); + all_empty &= mask_tinyset.is_empty(); + } + all_empty +} + +impl Intersection { + fn count_including_deleted_sparse(&mut self) -> u32 { + let mut count = 0u32; + let mut doc = self.doc(); + while doc != TERMINATED { + count += 1; + doc = self.advance(); + } + count + } + + /// Dense block-wise bitmask intersection count. + /// + /// Fills a 1024-doc window from each iterator, ANDs the bitmasks together, + /// and popcounts the result. `fill_bitset_block` handles seeking tails forward + /// when they lag behind the current block. + fn count_including_deleted_dense(&mut self) -> u32 { + let mut count = 0u32; + let mut next_base = self.left.doc(); + + while next_base < TERMINATED { + let base = next_base; + + // Fill lead bitmask. + let mut mask = EMPTY_BLOCK; + next_base = next_base.max(self.left.fill_bitset_block(base, &mut mask)); + + let mut tail_mask = EMPTY_BLOCK; + next_base = next_base.max(self.right.fill_bitset_block(base, &mut tail_mask)); + + if and_blocks_and_return_is_empty(&mut mask, &tail_mask) { + continue; + } + // AND with each additional tail. + for other in &mut self.others { + let mut other_mask = EMPTY_BLOCK; + next_base = next_base.max(other.fill_bitset_block(base, &mut other_mask)); + if and_blocks_and_return_is_empty(&mut mask, &other_mask) { + continue; + } + } + + for tinyset in &mask { + count += tinyset.len(); + } + } + + count + } } impl Scorer for Intersection @@ -421,6 +511,82 @@ mod tests { } } + proptest! { + #[test] + fn prop_test_count_including_deleted_matches_default( + a in sorted_deduped_vec(1200, 400), + b in sorted_deduped_vec(1200, 400), + c in sorted_deduped_vec(1200, 400), + num_docs in 1200u32..2000u32, + ) { + // Compute expected count via set intersection. + let expected: u32 = a.iter() + .filter(|doc| b.contains(doc) && c.contains(doc)) + .count() as u32; + + // Test count_including_deleted (dense path). + let make_intersection = || { + Intersection::new( + vec![ + VecDocSet::from(a.clone()), + VecDocSet::from(b.clone()), + VecDocSet::from(c.clone()), + ], + num_docs, + ) + }; + + let mut intersection = make_intersection(); + let count = intersection.count_including_deleted(); + prop_assert_eq!(count, expected, + "count_including_deleted mismatch: a={:?}, b={:?}, c={:?}", a, b, c); + } + } + + #[test] + fn test_count_including_deleted_two_way() { + let left = VecDocSet::from(vec![1, 3, 9]); + let right = VecDocSet::from(vec![3, 4, 9, 18]); + let mut intersection = Intersection::new(vec![left, right], 100); + assert_eq!(intersection.count_including_deleted(), 2); + } + + #[test] + fn test_count_including_deleted_empty() { + let a = VecDocSet::from(vec![1, 3]); + let b = VecDocSet::from(vec![1, 4]); + let c = VecDocSet::from(vec![3, 9]); + let mut intersection = Intersection::new(vec![a, b, c], 100); + assert_eq!(intersection.count_including_deleted(), 0); + } + + /// Test with enough documents to exercise the dense path (>= num_docs/32). + #[test] + fn test_count_including_deleted_dense_path() { + // Create dense docsets: many docs relative to segment size. + let docs_a: Vec = (0..2000).step_by(2).collect(); // even numbers 0..2000 + let docs_b: Vec = (0..2000).step_by(3).collect(); // multiples of 3 + let expected = docs_a.iter().filter(|d| *d % 3 == 0).count() as u32; + + let a = VecDocSet::from(docs_a); + let b = VecDocSet::from(docs_b); + let mut intersection = Intersection::new(vec![a, b], 2000); + assert_eq!(intersection.count_including_deleted(), expected); + } + + /// Test that spans multiple blocks (>1024 docs). + #[test] + fn test_count_including_deleted_multi_block() { + let docs_a: Vec = (0..5000).collect(); + let docs_b: Vec = (0..5000).step_by(7).collect(); + let expected = docs_b.len() as u32; // all of b is in a + + let a = VecDocSet::from(docs_a); + let b = VecDocSet::from(docs_b); + let mut intersection = Intersection::new(vec![a, b], 5000); + assert_eq!(intersection.count_including_deleted(), expected); + } + #[test] fn test_bug_2811_intersection_candidate_should_increase() { let mut schema_builder = Schema::builder(); diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs index a75648348..81932dede 100644 --- a/src/query/term_query/term_scorer.rs +++ b/src/query/term_query/term_scorer.rs @@ -117,6 +117,12 @@ impl DocSet for TermScorer { fn size_hint(&self) -> u32 { self.postings.size_hint() } + + // TODO + // It is probably possible to optimize fill_bitset_block for TermScorer, + // working directly with the blocks, enabling vectorization. + // I did not manage to get a performance improvement on Mac ARM, + // and do not have access to x86 to investigate. } impl Scorer for TermScorer {