Compare commits

...

2 Commits

Author SHA1 Message Date
Pascal Seitz
5d53b11a2c change debug_assert to assert in term length
change `debug_assert` to `assert` since the bug with the truncated Terms only occurs in prod
2024-04-04 22:54:28 +08:00
PSeitz
4e79e11007 add collect_block to BoxableSegmentCollector (#2331) 2024-03-21 09:10:25 +01:00
2 changed files with 21 additions and 1 deletions

View File

@@ -52,10 +52,16 @@ impl<TCollector: Collector> Collector for CollectorWrapper<TCollector> {
impl SegmentCollector for Box<dyn BoxableSegmentCollector> {
type Fruit = Box<dyn Fruit>;
#[inline]
fn collect(&mut self, doc: u32, score: Score) {
self.as_mut().collect(doc, score);
}
#[inline]
fn collect_block(&mut self, docs: &[DocId]) {
self.as_mut().collect_block(docs);
}
fn harvest(self) -> Box<dyn Fruit> {
BoxableSegmentCollector::harvest_from_box(self)
}
@@ -63,6 +69,11 @@ impl SegmentCollector for Box<dyn BoxableSegmentCollector> {
pub trait BoxableSegmentCollector {
fn collect(&mut self, doc: u32, score: Score);
fn collect_block(&mut self, docs: &[DocId]) {
for &doc in docs {
self.collect(doc, 0.0);
}
}
fn harvest_from_box(self: Box<Self>) -> Box<dyn Fruit>;
}
@@ -71,9 +82,14 @@ pub struct SegmentCollectorWrapper<TSegmentCollector: SegmentCollector>(TSegment
impl<TSegmentCollector: SegmentCollector> BoxableSegmentCollector
for SegmentCollectorWrapper<TSegmentCollector>
{
#[inline]
fn collect(&mut self, doc: u32, score: Score) {
self.0.collect(doc, score);
}
#[inline]
fn collect_block(&mut self, docs: &[DocId]) {
self.0.collect_block(docs);
}
fn harvest_from_box(self: Box<Self>) -> Box<dyn Fruit> {
Box::new(self.0.harvest())

View File

@@ -204,7 +204,11 @@ impl<Rec: Recorder> SpecializedPostingsWriter<Rec> {
impl<Rec: Recorder> PostingsWriter for SpecializedPostingsWriter<Rec> {
#[inline]
fn subscribe(&mut self, doc: DocId, position: u32, term: &Term, ctx: &mut IndexingContext) {
debug_assert!(term.serialized_term().len() >= 4);
assert!(
term.serialized_term().len() >= 4,
"Term too short expect >=4 but got {:?}",
term.serialized_term()
);
self.total_num_tokens += 1;
let (term_index, arena) = (&mut ctx.term_index, &mut ctx.arena);
term_index.mutate_or_create(term.serialized_term(), |opt_recorder: Option<Rec>| {