From ca87fcd454e57a748ce6201f2853e9183a9bebae Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Mon, 1 Dec 2025 03:26:29 -0800 Subject: [PATCH] Implement `collect_block` for `Collector`s which wrap other `Collector`s (#2727) * Implement `collect_block` for tuple Collectors, and for MultiCollector. * Two more. --- src/collector/filter_collector_wrapper.rs | 32 +++++++++++++++++++++++ src/collector/mod.rs | 24 +++++++++++++++++ src/collector/multi_collector.rs | 6 +++++ 3 files changed, 62 insertions(+) diff --git a/src/collector/filter_collector_wrapper.rs b/src/collector/filter_collector_wrapper.rs index 167ec980b..4e09b027c 100644 --- a/src/collector/filter_collector_wrapper.rs +++ b/src/collector/filter_collector_wrapper.rs @@ -120,6 +120,7 @@ where segment_collector, predicate: self.predicate.clone(), t_predicate_value: PhantomData, + filtered_docs: Vec::with_capacity(crate::COLLECT_BLOCK_BUFFER_LEN), }) } @@ -140,6 +141,7 @@ pub struct FilterSegmentCollector, + filtered_docs: Vec, } impl @@ -176,6 +178,20 @@ where } } + fn collect_block(&mut self, docs: &[DocId]) { + self.filtered_docs.clear(); + for &doc in docs { + // TODO: `accept_document` could be further optimized to do batch lookups of column + // values for single-valued columns. + if self.accept_document(doc) { + self.filtered_docs.push(doc); + } + } + if !self.filtered_docs.is_empty() { + self.segment_collector.collect_block(&self.filtered_docs); + } + } + fn harvest(self) -> TSegmentCollector::Fruit { self.segment_collector.harvest() } @@ -274,6 +290,7 @@ where segment_collector, predicate: self.predicate.clone(), buffer: Vec::new(), + filtered_docs: Vec::with_capacity(crate::COLLECT_BLOCK_BUFFER_LEN), }) } @@ -296,6 +313,7 @@ where TPredicate: 'static segment_collector: TSegmentCollector, predicate: TPredicate, buffer: Vec, + filtered_docs: Vec, } impl BytesFilterSegmentCollector @@ -334,6 +352,20 @@ where } } + fn collect_block(&mut self, docs: &[DocId]) { + self.filtered_docs.clear(); + for &doc in docs { + // TODO: `accept_document` could be further optimized to do batch lookups of column + // values for single-valued columns. + if self.accept_document(doc) { + self.filtered_docs.push(doc); + } + } + if !self.filtered_docs.is_empty() { + self.segment_collector.collect_block(&self.filtered_docs); + } + } + fn harvest(self) -> TSegmentCollector::Fruit { self.segment_collector.harvest() } diff --git a/src/collector/mod.rs b/src/collector/mod.rs index a31754316..6c509fcfb 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -214,6 +214,12 @@ impl SegmentCollector for Option Self::Fruit { self.map(|segment_collector| segment_collector.harvest()) } @@ -342,6 +348,11 @@ where self.1.collect(doc, score); } + fn collect_block(&mut self, docs: &[DocId]) { + self.0.collect_block(docs); + self.1.collect_block(docs); + } + fn harvest(self) -> ::Fruit { (self.0.harvest(), self.1.harvest()) } @@ -407,6 +418,12 @@ where self.2.collect(doc, score); } + fn collect_block(&mut self, docs: &[DocId]) { + self.0.collect_block(docs); + self.1.collect_block(docs); + self.2.collect_block(docs); + } + fn harvest(self) -> ::Fruit { (self.0.harvest(), self.1.harvest(), self.2.harvest()) } @@ -482,6 +499,13 @@ where self.3.collect(doc, score); } + fn collect_block(&mut self, docs: &[DocId]) { + self.0.collect_block(docs); + self.1.collect_block(docs); + self.2.collect_block(docs); + self.3.collect_block(docs); + } + fn harvest(self) -> ::Fruit { ( self.0.harvest(), diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs index 8077577d2..7d2196e02 100644 --- a/src/collector/multi_collector.rs +++ b/src/collector/multi_collector.rs @@ -250,6 +250,12 @@ impl SegmentCollector for MultiCollectorChild { } } + fn collect_block(&mut self, docs: &[DocId]) { + for child in &mut self.children { + child.collect_block(docs); + } + } + fn harvest(self) -> MultiFruit { MultiFruit { sub_fruits: self