TermScorer does not handle deletes

This commit is contained in:
Paul Masurel
2018-03-27 17:35:20 +09:00
parent 98cf4ba63a
commit ffa03bad71
17 changed files with 135 additions and 259 deletions

View File

@@ -12,7 +12,7 @@ use query::RequiredOptionalScorer;
use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner};
use Result;
use query::intersect_scorers;
use query::term_query::{TermScorerWithDeletes, TermScorerNoDeletes};
use query::term_query::TermScorer;
fn scorer_union<TScoreCombiner>(scorers: Vec<Box<Scorer>>) -> Box<Scorer>
@@ -27,32 +27,18 @@ where
{
let is_all_term_queries = scorers.iter().all(|scorer| {
let scorer_ref: &Scorer = scorer.borrow();
Downcast::<TermScorerWithDeletes>::is_type(scorer_ref)
Downcast::<TermScorer>::is_type(scorer_ref)
});
if is_all_term_queries {
let scorers: Vec<TermScorerWithDeletes> = scorers
let scorers: Vec<TermScorer> = scorers
.into_iter()
.map(|scorer| *Downcast::<TermScorerWithDeletes>::downcast(scorer).unwrap())
.map(|scorer| *Downcast::<TermScorer>::downcast(scorer).unwrap())
.collect();
let scorer: Box<Scorer> = box Union::<TermScorerWithDeletes, TScoreCombiner>::from(scorers);
let scorer: Box<Scorer> = box Union::<TermScorer, TScoreCombiner>::from(scorers);
return scorer;
}
}
{
let is_all_term_queries = scorers.iter().all(|scorer| {
let scorer_ref: &Scorer = scorer.borrow();
Downcast::<TermScorerNoDeletes>::is_type(scorer_ref)
});
if is_all_term_queries {
let scorers: Vec<TermScorerNoDeletes> = scorers
.into_iter()
.map(|scorer| *Downcast::<TermScorerNoDeletes>::downcast(scorer).unwrap())
.collect();
let scorer: Box<Scorer> = box Union::<TermScorerNoDeletes, TScoreCombiner>::from(scorers);
return scorer;
}
}
let scorer: Box<Scorer> = box Union::<_, TScoreCombiner>::from(scorers);
return scorer;

View File

@@ -19,7 +19,7 @@ mod tests {
use query::QueryParser;
use query::RequiredOptionalScorer;
use query::score_combiner::SumWithCoordsCombiner;
use query::term_query::TermScorerNoDeletes;
use query::term_query::TermScorer;
fn aux_test_helper() -> (Index, Field) {
let mut schema_builder = SchemaBuilder::default();
@@ -71,7 +71,7 @@ mod tests {
let searcher = index.searcher();
let weight = query.weight(&*searcher, true).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
assert!(Downcast::<TermScorerNoDeletes>::is_type(&*scorer));
assert!(Downcast::<TermScorer>::is_type(&*scorer));
}
#[test]
@@ -83,7 +83,7 @@ mod tests {
let query = query_parser.parse_query("+a +b +c").unwrap();
let weight = query.weight(&*searcher, true).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
assert!(Downcast::<Intersection<TermScorerNoDeletes>>::is_type(&*scorer));
assert!(Downcast::<Intersection<TermScorer>>::is_type(&*scorer));
}
{
let query = query_parser.parse_query("+a +(b c)").unwrap();
@@ -111,7 +111,7 @@ mod tests {
let weight = query.weight(&*searcher, false).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
println!("{:?}", scorer.type_name());
assert!(Downcast::<TermScorerNoDeletes>::is_type(&*scorer));
assert!(Downcast::<TermScorer>::is_type(&*scorer));
}
}

View File

@@ -5,7 +5,7 @@ use DocId;
use downcast::Downcast;
use std::borrow::Borrow;
use Score;
use query::term_query::{TermScorerNoDeletes, TermScorerWithDeletes};
use query::term_query::TermScorer;
/// Returns the intersection scorer.
///
@@ -28,10 +28,10 @@ pub fn intersect_scorers(mut scorers: Vec<Box<Scorer>>) -> Box<Scorer> {
{
if [&left, &right].into_iter().all(|scorer| {
let scorer_ref: &Scorer = (*scorer).borrow();
Downcast::<TermScorerWithDeletes>::is_type(scorer_ref)
Downcast::<TermScorer>::is_type(scorer_ref)
}) {
let left = *Downcast::<TermScorerWithDeletes>::downcast(left).unwrap();
let right = *Downcast::<TermScorerWithDeletes>::downcast(right).unwrap();
let left = *Downcast::<TermScorer>::downcast(left).unwrap();
let right = *Downcast::<TermScorer>::downcast(right).unwrap();
return box Intersection {
left,
right,
@@ -40,29 +40,11 @@ pub fn intersect_scorers(mut scorers: Vec<Box<Scorer>>) -> Box<Scorer> {
}
}
}
{
if [&left, &right].into_iter()
.all(|scorer| {
let scorer_ref: &Scorer = (*scorer).borrow();
Downcast::<TermScorerNoDeletes>::is_type(scorer_ref)
}) {
let left = *Downcast::<TermScorerNoDeletes>::downcast(left).unwrap();
let right = *Downcast::<TermScorerNoDeletes>::downcast(right).unwrap();
return box Intersection {
left,
right,
others: scorers,
num_docsets
}
}
}
{
return box Intersection {
left,
right,
others: scorers,
num_docsets
}
return box Intersection {
left,
right,
others: scorers,
num_docsets
}
}
_ => { unreachable!(); }

View File

@@ -79,11 +79,9 @@ pub trait Query: fmt::Debug {
let _ = segment_search_timer.open("set_segment");
collector.set_segment(segment_ord as SegmentLocalId, segment_reader)?;
}
let _collection_timer = segment_search_timer.open("collection");
let mut scorer = weight.scorer(segment_reader)?;
{
let _collection_timer = segment_search_timer.open("collection");
scorer.collect(collector);
}
scorer.collect(collector, segment_reader.delete_bitset());
}
}
Ok(timer_tree)

View File

@@ -5,6 +5,7 @@ use docset::{DocSet, SkipResult};
use common::BitSet;
use std::ops::DerefMut;
use downcast;
use fastfield::DeleteBitSet;
/// Scored set of documents matching a query within a specific segment.
///
@@ -17,13 +18,23 @@ pub trait Scorer: downcast::Any + DocSet + 'static {
/// Consumes the complete `DocSet` and
/// push the scored documents to the collector.
fn collect(&mut self, collector: &mut Collector) {
while self.advance() {
collector.collect(self.doc(), self.score());
fn collect(&mut self, collector: &mut Collector, delete_bitset_opt: Option<&DeleteBitSet>) {
if let Some(delete_bitset) = delete_bitset_opt {
while self.advance() {
let doc = self.doc();
if !delete_bitset.is_deleted(doc) {
collector.collect(doc, self.score());
}
}
} else {
while self.advance() {
collector.collect(self.doc(), self.score());
}
}
}
}
#[allow(missing_docs)]
mod downcast_impl {
downcast!(super::Scorer);
@@ -34,9 +45,9 @@ impl Scorer for Box<Scorer> {
self.deref_mut().score()
}
fn collect(&mut self, collector: &mut Collector) {
fn collect(&mut self, collector: &mut Collector, delete_bitset: Option<&DeleteBitSet>) {
let scorer = self.deref_mut();
scorer.collect(collector);
scorer.collect(collector, delete_bitset);
}
}
@@ -50,6 +61,7 @@ impl DocSet for EmptyScorer {
false
}
fn doc(&self) -> DocId {
panic!(
"You may not call .doc() on a scorer \

View File

@@ -6,16 +6,6 @@ pub use self::term_query::TermQuery;
pub use self::term_weight::TermWeight;
pub use self::term_scorer::TermScorer;
use postings::SegmentPostings;
use postings::NoDelete;
use fastfield::DeleteBitSet;
pub(crate) type TermScorerWithDeletes = TermScorer<SegmentPostings<DeleteBitSet>>;
pub(crate) type TermScorerNoDeletes = TermScorer<SegmentPostings<NoDelete>>;
#[cfg(test)]
mod tests {

View File

@@ -6,18 +6,19 @@ use query::Scorer;
use postings::Postings;
use fieldnorm::FieldNormReader;
use query::bm25::BM25Weight;
use postings::SegmentPostings;
pub struct TermScorer<TPostings: Postings> {
postings: TPostings,
pub struct TermScorer {
postings: SegmentPostings,
fieldnorm_reader: FieldNormReader,
similarity_weight: BM25Weight,
}
impl<TPostings: Postings> TermScorer<TPostings> {
pub fn new(postings: TPostings,
impl TermScorer {
pub fn new(postings: SegmentPostings,
fieldnorm_reader: FieldNormReader,
similarity_weight: BM25Weight) -> TermScorer<TPostings> {
similarity_weight: BM25Weight) -> TermScorer {
TermScorer {
postings,
fieldnorm_reader,
@@ -26,7 +27,7 @@ impl<TPostings: Postings> TermScorer<TPostings> {
}
}
impl<TPostings: Postings> DocSet for TermScorer<TPostings> {
impl DocSet for TermScorer {
fn advance(&mut self) -> bool {
self.postings.advance()
}
@@ -44,7 +45,7 @@ impl<TPostings: Postings> DocSet for TermScorer<TPostings> {
}
}
impl<TPostings: Postings> Scorer for TermScorer<TPostings> {
impl Scorer for TermScorer {
fn score(&mut self) -> Score {
let doc = self.doc();
let fieldnorm_id = self.fieldnorm_reader.fieldnorm_id(doc);

View File

@@ -6,8 +6,6 @@ use docset::DocSet;
use postings::SegmentPostings;
use schema::IndexRecordOption;
use super::term_scorer::TermScorer;
use fastfield::DeleteBitSet;
use postings::NoDelete;
use Result;
use query::bm25::BM25Weight;
@@ -24,33 +22,18 @@ impl Weight for TermWeight {
let inverted_index = reader.inverted_index(field);
let fieldnorm_reader = reader.get_fieldnorms_reader(field);
let similarity_weight = self.similarity_weight.clone();
if reader.has_deletes() {
let postings_opt: Option<SegmentPostings<DeleteBitSet>> =
let postings_opt: Option<SegmentPostings> =
inverted_index.read_postings(&self.term, self.index_record_option);
if let Some(segment_postings) = postings_opt {
Ok(box TermScorer::new(segment_postings,
fieldnorm_reader,
similarity_weight))
} else {
Ok(box TermScorer::new(
SegmentPostings::<NoDelete>::empty(),
fieldnorm_reader,
similarity_weight))
}
} else {
let postings_opt: Option<SegmentPostings<NoDelete>> =
inverted_index.read_postings_no_deletes(&self.term, self.index_record_option);
if let Some(segment_postings) = postings_opt {
Ok(box TermScorer::new(segment_postings,
fieldnorm_reader,
similarity_weight))
} else {
Ok(box TermScorer::new(
SegmentPostings::<NoDelete>::empty(),
SegmentPostings::empty(),
fieldnorm_reader,
similarity_weight))
}
}
}
fn count(&self, reader: &SegmentReader) -> Result<u32> {