Implement collect_block for Collectors which wrap other Collectors (#2727)

* Implement `collect_block` for tuple Collectors, and for MultiCollector.

* Two more.
This commit is contained in:
Stu Hood
2025-12-01 03:26:29 -08:00
committed by GitHub
parent 08a92675dc
commit ca87fcd454
3 changed files with 62 additions and 0 deletions

View File

@@ -120,6 +120,7 @@ where
segment_collector,
predicate: self.predicate.clone(),
t_predicate_value: PhantomData,
filtered_docs: Vec::with_capacity(crate::COLLECT_BLOCK_BUFFER_LEN),
})
}
@@ -140,6 +141,7 @@ pub struct FilterSegmentCollector<TSegmentCollector, TPredicate, TPredicateValue
segment_collector: TSegmentCollector,
predicate: TPredicate,
t_predicate_value: PhantomData<TPredicateValue>,
filtered_docs: Vec<DocId>,
}
impl<TSegmentCollector, TPredicate, TPredicateValue>
@@ -176,6 +178,20 @@ where
}
}
fn collect_block(&mut self, docs: &[DocId]) {
self.filtered_docs.clear();
for &doc in docs {
// TODO: `accept_document` could be further optimized to do batch lookups of column
// values for single-valued columns.
if self.accept_document(doc) {
self.filtered_docs.push(doc);
}
}
if !self.filtered_docs.is_empty() {
self.segment_collector.collect_block(&self.filtered_docs);
}
}
fn harvest(self) -> TSegmentCollector::Fruit {
self.segment_collector.harvest()
}
@@ -274,6 +290,7 @@ where
segment_collector,
predicate: self.predicate.clone(),
buffer: Vec::new(),
filtered_docs: Vec::with_capacity(crate::COLLECT_BLOCK_BUFFER_LEN),
})
}
@@ -296,6 +313,7 @@ where TPredicate: 'static
segment_collector: TSegmentCollector,
predicate: TPredicate,
buffer: Vec<u8>,
filtered_docs: Vec<DocId>,
}
impl<TSegmentCollector, TPredicate> BytesFilterSegmentCollector<TSegmentCollector, TPredicate>
@@ -334,6 +352,20 @@ where
}
}
fn collect_block(&mut self, docs: &[DocId]) {
self.filtered_docs.clear();
for &doc in docs {
// TODO: `accept_document` could be further optimized to do batch lookups of column
// values for single-valued columns.
if self.accept_document(doc) {
self.filtered_docs.push(doc);
}
}
if !self.filtered_docs.is_empty() {
self.segment_collector.collect_block(&self.filtered_docs);
}
}
fn harvest(self) -> TSegmentCollector::Fruit {
self.segment_collector.harvest()
}

View File

@@ -214,6 +214,12 @@ impl<TSegmentCollector: SegmentCollector> SegmentCollector for Option<TSegmentCo
}
}
fn collect_block(&mut self, docs: &[DocId]) {
if let Some(segment_collector) = self {
segment_collector.collect_block(docs);
}
}
fn harvest(self) -> Self::Fruit {
self.map(|segment_collector| segment_collector.harvest())
}
@@ -342,6 +348,11 @@ where
self.1.collect(doc, score);
}
fn collect_block(&mut self, docs: &[DocId]) {
self.0.collect_block(docs);
self.1.collect_block(docs);
}
fn harvest(self) -> <Self as SegmentCollector>::Fruit {
(self.0.harvest(), self.1.harvest())
}
@@ -407,6 +418,12 @@ where
self.2.collect(doc, score);
}
fn collect_block(&mut self, docs: &[DocId]) {
self.0.collect_block(docs);
self.1.collect_block(docs);
self.2.collect_block(docs);
}
fn harvest(self) -> <Self as SegmentCollector>::Fruit {
(self.0.harvest(), self.1.harvest(), self.2.harvest())
}
@@ -482,6 +499,13 @@ where
self.3.collect(doc, score);
}
fn collect_block(&mut self, docs: &[DocId]) {
self.0.collect_block(docs);
self.1.collect_block(docs);
self.2.collect_block(docs);
self.3.collect_block(docs);
}
fn harvest(self) -> <Self as SegmentCollector>::Fruit {
(
self.0.harvest(),

View File

@@ -250,6 +250,12 @@ impl SegmentCollector for MultiCollectorChild {
}
}
fn collect_block(&mut self, docs: &[DocId]) {
for child in &mut self.children {
child.collect_block(docs);
}
}
fn harvest(self) -> MultiFruit {
MultiFruit {
sub_fruits: self