Fix compound filters, and remove redundant implementation in Chain implementation

This commit is contained in:
Stu Hood
2025-12-28 10:18:52 -07:00
parent b2e980b450
commit 00110312c9
2 changed files with 72 additions and 144 deletions

View File

@@ -4,7 +4,6 @@ use columnar::ValueRange;
use crate::collector::sort_key::{Comparator, NaturalComparator};
use crate::collector::sort_key_top_collector::TopBySortKeySegmentCollector;
use crate::collector::top_score_collector::push_assuming_capacity;
use crate::collector::{
default_collect_segment_impl, ComparableDoc, SegmentCollector as _, TopNComputer,
};
@@ -98,27 +97,6 @@ pub trait SegmentSortKeyComputer: 'static {
self.segment_comparator().compare(left, right)
}
/// Similar to `accept_sort_key_lazy`, but pushes results directly into the given buffer. Does
/// not support scoring.
///
/// The buffer must have at least enough capacity for `docs` matches, or this method will
/// panic.
fn accept_sort_key_block_lazy(
&mut self,
docs: &[DocId],
threshold: &Self::SegmentSortKey,
output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
) {
let comparator = self.segment_comparator();
for &doc in docs {
let sort_key = self.segment_sort_key(doc, 0.0);
let cmp = comparator.compare(&sort_key, threshold);
if cmp != Ordering::Less {
push_assuming_capacity(ComparableDoc { sort_key, doc }, output);
}
}
}
/// Convert a segment level sort key into the global sort key.
fn convert_segment_sort_key(&self, sort_key: Self::SegmentSortKey) -> Self::SortKey;
}
@@ -323,12 +301,78 @@ where
fn segment_sort_keys(
&mut self,
_input_docs: &[DocId],
_output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
_buffer: &mut Self::Buffer,
_filter: ValueRange<Self::SegmentSortKey>,
input_docs: &[DocId],
output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
buffer: &mut Self::Buffer,
filter: ValueRange<Self::SegmentSortKey>,
) {
unimplemented!("The head and the tail are accessed independently.");
let (head_filter, threshold) = match filter {
ValueRange::GreaterThan((head_threshold, tail_threshold), _)
| ValueRange::LessThan((head_threshold, tail_threshold), _) => {
let head_cmp = self.head.segment_comparator();
let head_filter = head_cmp.threshold_to_valuerange(head_threshold.clone());
(head_filter, Some((head_threshold, tail_threshold)))
}
_ => (ValueRange::All, None),
};
buffer.head_output.clear();
self.head.segment_sort_keys(
input_docs,
&mut buffer.head_output,
&mut buffer.head,
head_filter,
);
if buffer.head_output.is_empty() {
return;
}
buffer.tail_output.clear();
buffer.tail_input_docs.clear();
for cd in &buffer.head_output {
buffer.tail_input_docs.push(cd.doc);
}
self.tail.segment_sort_keys(
&buffer.tail_input_docs,
&mut buffer.tail_output,
&mut buffer.tail,
ValueRange::All,
);
let head_cmp = self.head.segment_comparator();
let tail_cmp = self.tail.segment_comparator();
for (head_doc, tail_doc) in buffer
.head_output
.drain(..)
.zip(buffer.tail_output.drain(..))
{
debug_assert_eq!(head_doc.doc, tail_doc.doc);
let doc = head_doc.doc;
let head_key = head_doc.sort_key;
let tail_key = tail_doc.sort_key;
let accept = if let Some((head_threshold, tail_threshold)) = &threshold {
let head_ord = head_cmp.compare(&head_key, head_threshold);
let ord = if head_ord == Ordering::Equal {
tail_cmp.compare(&tail_key, tail_threshold)
} else {
head_ord
};
ord == Ordering::Greater
} else {
true
};
if accept {
output.push(ComparableDoc {
sort_key: (head_key, tail_key),
doc,
});
}
}
}
#[inline(always)]
@@ -351,118 +395,6 @@ where
top_n_computer.append_doc(doc, sort_key);
}
fn compute_sort_keys_and_collect<C: Comparator<Self::SegmentSortKey>>(
&mut self,
docs: &[DocId],
top_n_computer: &mut TopNComputer<Self::SegmentSortKey, DocId, C, Self::Buffer>,
) {
// The capacity of a TopNComputer is larger than 2*n + COLLECT_BLOCK_BUFFER_LEN, so we
// should always be able to `reserve` space for the entire block.
top_n_computer.reserve(docs.len());
let mut scratch = std::mem::take(&mut top_n_computer.scratch);
if let Some(threshold) = &top_n_computer.threshold {
let (head_threshold, tail_threshold) = threshold.clone();
let head_cmp = self.head.segment_comparator();
let tail_cmp = self.tail.segment_comparator();
let head_filter = head_cmp.threshold_to_valuerange(head_threshold.clone());
scratch.head_output.clear();
self.head.segment_sort_keys(
docs,
&mut scratch.head_output,
&mut scratch.head,
head_filter,
);
if !scratch.head_output.is_empty() {
scratch.tail_output.clear();
scratch.tail_input_docs.clear();
for cd in &scratch.head_output {
scratch.tail_input_docs.push(cd.doc);
}
self.tail.segment_sort_keys(
&scratch.tail_input_docs,
&mut scratch.tail_output,
&mut scratch.tail,
ValueRange::All,
);
for (head_doc, tail_doc) in scratch
.head_output
.drain(..)
.zip(scratch.tail_output.drain(..))
{
debug_assert_eq!(head_doc.doc, tail_doc.doc);
let doc = head_doc.doc;
let head_key = head_doc.sort_key;
let tail_key = tail_doc.sort_key;
let head_ord = head_cmp.compare(&head_key, &head_threshold);
let ord = if head_ord == Ordering::Equal {
tail_cmp.compare(&tail_key, &tail_threshold)
} else {
head_ord
};
if ord == Ordering::Greater {
push_assuming_capacity(
ComparableDoc {
sort_key: (head_key, tail_key),
doc,
},
top_n_computer.buffer(),
);
}
}
}
} else {
// Eagerly push, without a threshold to compare to.
scratch.head_output.clear();
self.head.segment_sort_keys(
docs,
&mut scratch.head_output,
&mut scratch.head,
ValueRange::All,
);
scratch.tail_output.clear();
scratch.tail_input_docs.clear();
for cd in &scratch.head_output {
scratch.tail_input_docs.push(cd.doc);
}
self.tail.segment_sort_keys(
&scratch.tail_input_docs,
&mut scratch.tail_output,
&mut scratch.tail,
ValueRange::All,
);
for (head_doc, tail_doc) in scratch
.head_output
.drain(..)
.zip(scratch.tail_output.drain(..))
{
debug_assert_eq!(head_doc.doc, tail_doc.doc);
let doc = head_doc.doc;
let head_key = head_doc.sort_key;
let tail_key = tail_doc.sort_key;
// We validated at the top of the method that we have capacity.
push_assuming_capacity(
ComparableDoc {
sort_key: (head_key, tail_key),
doc,
},
top_n_computer.buffer(),
);
}
}
top_n_computer.scratch = scratch;
}
#[inline(always)]
fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Self::SegmentSortKey {
let head_sort_key = self.head.segment_sort_key(doc, score);

View File

@@ -527,7 +527,7 @@ pub struct TopNComputer<Score, D, C, Buffer = ()> {
pub(crate) threshold: Option<Score>,
comparator: C,
#[serde(skip)]
pub scratch: Buffer,
scratch: Buffer,
}
// Intermediate struct for TopNComputer for deserialization, to keep vec capacity
@@ -665,10 +665,6 @@ where
}
}
pub(crate) fn buffer(&mut self) -> &mut Vec<ComparableDoc<TSortKey, D>> {
&mut self.buffer
}
pub(crate) fn buffer_and_scratch(
&mut self,
) -> (&mut Vec<ComparableDoc<TSortKey, D>>, &mut Buffer) {