TermScorer does not handle deletes

This commit is contained in:
Paul Masurel
2018-03-27 17:35:20 +09:00
parent 98cf4ba63a
commit ffa03bad71
17 changed files with 135 additions and 259 deletions

View File

@@ -4,11 +4,9 @@ use postings::{BlockSegmentPostings, SegmentPostings};
use postings::TermInfo;
use schema::IndexRecordOption;
use schema::Term;
use fastfield::DeleteBitSet;
use compression::CompressedIntStream;
use postings::FreqReadingOption;
use common::BinarySerializable;
use postings::{DeleteSet, NoDelete};
use schema::FieldType;
/// The inverted index reader is in charge of accessing
@@ -28,7 +26,6 @@ pub struct InvertedIndexReader {
termdict: TermDictionaryImpl,
postings_source: ReadOnlySource,
positions_source: ReadOnlySource,
delete_bitset_opt: Option<DeleteBitSet>,
record_option: IndexRecordOption,
total_num_tokens: u64
}
@@ -38,7 +35,6 @@ impl InvertedIndexReader {
termdict: TermDictionaryImpl,
postings_source: ReadOnlySource,
positions_source: ReadOnlySource,
delete_bitset_opt: Option<DeleteBitSet>,
record_option: IndexRecordOption,
) -> InvertedIndexReader {
let total_num_tokens_data = postings_source.slice(0, 8);
@@ -48,7 +44,6 @@ impl InvertedIndexReader {
termdict,
postings_source: postings_source.slice_from(8),
positions_source,
delete_bitset_opt,
record_option,
total_num_tokens
}
@@ -64,7 +59,6 @@ impl InvertedIndexReader {
termdict: TermDictionaryImpl::empty(field_type),
postings_source: ReadOnlySource::empty(),
positions_source: ReadOnlySource::empty(),
delete_bitset_opt: None,
record_option,
total_num_tokens: 0u64
}
@@ -129,15 +123,12 @@ impl InvertedIndexReader {
/// This method is for an advanced usage only.
///
/// Most user should prefer using `read_postings` instead.
pub fn read_postings_from_terminfo<TDeleteSet: DeleteSet>(
pub fn read_postings_from_terminfo(
&self,
term_info: &TermInfo,
option: IndexRecordOption,
) -> SegmentPostings<TDeleteSet> {
) -> SegmentPostings {
let block_postings = self.read_block_postings_from_terminfo(term_info, option);
let delete_set = TDeleteSet::from(self.delete_bitset_opt.iter()
.cloned()
.next());
let position_stream = {
if option.has_positions() {
let position_offset = term_info.positions_offset;
@@ -149,7 +140,7 @@ impl InvertedIndexReader {
None
}
};
SegmentPostings::from_block_postings(block_postings, delete_set, position_stream)
SegmentPostings::from_block_postings(block_postings, position_stream)
}
/// Returns the total number of tokens recorded for all documents
@@ -170,12 +161,12 @@ impl InvertedIndexReader {
/// For instance, requesting `IndexRecordOption::Freq` for a
/// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
/// with `DocId`s and frequencies.
pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings<DeleteBitSet>> {
pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
let term_info = get!(self.get_term_info(term));
Some(self.read_postings_from_terminfo(&term_info, option))
}
pub(crate) fn read_postings_no_deletes(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings<NoDelete>> {
pub(crate) fn read_postings_no_deletes(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
let term_info = get!(self.get_term_info(term));
Some(self.read_postings_from_terminfo(&term_info, option))
}

View File

@@ -26,7 +26,6 @@ use termdict::TermDictionary;
use fastfield::{FastValue, MultiValueIntFastFieldReader};
use schema::Cardinality;
use fieldnorm::FieldNormReader;
use postings::DeleteSet;
/// Entry point to access all of the datastructures of the `Segment`
///
@@ -285,7 +284,6 @@ impl SegmentReader {
TermDictionaryImpl::from_source(termdict_source),
postings_source,
positions_source,
self.delete_bitset_opt.clone(),
record_option,
));

View File

@@ -5,7 +5,6 @@ use std::io;
use directory::ReadOnlySource;
use DocId;
use common::HasLen;
use postings::DeleteSet;
/// Write a delete `BitSet`
///
@@ -52,19 +51,7 @@ impl DeleteBitSet {
}
}
}
impl DeleteSet for DeleteBitSet {
fn empty() -> DeleteBitSet {
DeleteBitSet {
data: ReadOnlySource::empty(),
len: 0,
}
}
fn is_deleted(&self, doc: DocId) -> bool {
pub fn is_deleted(&self, doc: DocId) -> bool {
if self.len == 0 {
false
} else {
@@ -78,16 +65,6 @@ impl DeleteSet for DeleteBitSet {
}
impl From<Option<DeleteBitSet>> for DeleteBitSet {
fn from(delete_bitset_opt: Option<DeleteBitSet>) -> Self {
if let Some(delete_bitset) = delete_bitset_opt {
delete_bitset
} else {
DeleteBitSet::empty()
}
}
}
impl HasLen for DeleteBitSet {
fn len(&self) -> usize {
self.len

View File

@@ -19,7 +19,6 @@ use termdict::TermStreamer;
use fieldnorm::FieldNormsSerializer;
use fieldnorm::FieldNormsWriter;
use fieldnorm::FieldNormReader;
use postings::DeleteSet;
use postings::Postings;
@@ -296,12 +295,13 @@ impl IndexMerger {
let segment_reader = &self.readers[heap_item.segment_ord];
let inverted_index = segment_reader.inverted_index(indexed_field);
let mut segment_postings = inverted_index
.read_postings_from_terminfo::<DeleteBitSet>(term_info, segment_postings_option);
if segment_postings.advance() {
Some((segment_ord, segment_postings))
} else {
None
.read_postings_from_terminfo(term_info, segment_postings_option);
while segment_postings.advance() {
if !segment_reader.is_deleted(segment_postings.doc()) {
return Some((segment_ord, segment_postings));
}
}
None
})
.collect();
@@ -309,7 +309,6 @@ impl IndexMerger {
// of all of the segments containing the given term.
//
// These segments are non-empty and advance has already been called.
if !segment_postings.is_empty() {
// If not, the `term` will be entirely removed.

View File

@@ -1,7 +1,7 @@
pub mod index_writer;
pub mod segment_serializer;
pub mod merger;
mod merge_policy;
pub mod merge_policy;
mod log_merge_policy;
mod segment_register;
mod segment_writer;

View File

@@ -292,8 +292,7 @@ mod tests {
use Postings;
use rand::{Rng, SeedableRng, XorShiftRng};
use rand::distributions::{IndependentSample, Range};
pub fn assert_nearly_equals(expected: f32, val: f32) {
assert!(nearly_equals(val, expected), "Got {}, expected {}.", val, expected);
}
@@ -460,6 +459,16 @@ mod tests {
}
}
fn advance_undeleted(docset: &mut DocSet, reader: &SegmentReader) -> bool {
while docset.advance() {
if !reader.is_deleted(docset.doc()) {
return true;
}
}
false
}
#[test]
fn test_delete_postings1() {
let mut schema_builder = SchemaBuilder::default();
@@ -525,19 +534,19 @@ mod tests {
let mut postings = inverted_index
.read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
assert!(postings.advance());
assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 5);
assert!(!postings.advance());
assert!(!advance_undeleted(&mut postings, reader));
}
{
let mut postings = inverted_index
.read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
assert!(postings.advance());
assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 3);
assert!(postings.advance());
assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 4);
assert!(!postings.advance());
assert!(!advance_undeleted(&mut postings, reader));
}
}
{
@@ -569,19 +578,19 @@ mod tests {
let mut postings = inverted_index
.read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
assert!(postings.advance());
assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 5);
assert!(!postings.advance());
assert!(!advance_undeleted(&mut postings, reader));
}
{
let mut postings = inverted_index
.read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
assert!(postings.advance());
assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 3);
assert!(postings.advance());
assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 4);
assert!(!postings.advance());
assert!(!advance_undeleted(&mut postings, reader));
}
}
{
@@ -612,25 +621,25 @@ mod tests {
let mut postings = inverted_index
.read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
assert!(!postings.advance());
assert!(!advance_undeleted(&mut postings, reader));
}
{
let mut postings = inverted_index
.read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
assert!(postings.advance());
assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 3);
assert!(postings.advance());
assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 4);
assert!(!postings.advance());
assert!(!advance_undeleted(&mut postings, reader));
}
{
let mut postings = inverted_index
.read_postings(&term_c, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
assert!(postings.advance());
assert!(advance_undeleted(&mut postings, reader));
assert_eq!(postings.doc(), 4);
assert!(!postings.advance());
assert!(!advance_undeleted(&mut postings, reader));
}
}
}

View File

@@ -1,27 +0,0 @@
use fastfield::DeleteBitSet;
use DocId;
pub trait DeleteSet: 'static + From<Option<DeleteBitSet>> {
fn is_deleted(&self, doc: DocId) -> bool;
fn empty() -> Self;
}
#[derive(Default)]
pub struct NoDelete;
impl DeleteSet for NoDelete {
#[inline(always)]
fn is_deleted(&self, _doc: DocId) -> bool {
false
}
fn empty() -> Self {
NoDelete
}
}
impl From<Option<DeleteBitSet>> for NoDelete {
fn from(delete_bitset_opt: Option<DeleteBitSet>) -> Self {
assert!(delete_bitset_opt.is_none(), "NoDelete should not be used if there are some deleted documents.");
NoDelete
}
}

View File

@@ -13,13 +13,11 @@ mod serializer;
mod postings_writer;
mod term_info;
mod segment_postings;
mod delete_set;
use self::recorder::{NothingRecorder, Recorder, TFAndPositionRecorder, TermFrequencyRecorder};
pub use self::serializer::{FieldSerializer, InvertedIndexSerializer};
pub(crate) use self::postings_writer::MultiFieldPostingsWriter;
pub use self::delete_set::{DeleteSet, NoDelete};
pub use self::term_info::TermInfo;
pub use self::postings::Postings;
@@ -402,11 +400,9 @@ pub mod tests {
{
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
index_writer.delete_term(term_0);
assert!(index_writer.commit().is_ok());
}
index.load_searchers().unwrap();
let searcher = index.searcher();
let segment_reader = searcher.segment_reader(0);
@@ -418,8 +414,9 @@ pub mod tests {
.unwrap();
if i % 2 == 0 {
assert_eq!(segment_postings.skip_next(i), SkipResult::OverStep);
assert_eq!(segment_postings.doc(), i + 1);
assert_eq!(segment_postings.skip_next(i), SkipResult::Reached);
assert_eq!(segment_postings.doc(), i);
assert!(segment_reader.is_deleted(i));
} else {
assert_eq!(segment_postings.skip_next(i), SkipResult::Reached);
assert_eq!(segment_postings.doc(), i);
@@ -453,7 +450,7 @@ pub mod tests {
// delete everything else
{
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
index_writer.delete_term(term_1);
index_writer.delete_term(term_1);
assert!(index_writer.commit().is_ok());
}
@@ -469,7 +466,9 @@ pub mod tests {
.read_postings(&term_2, IndexRecordOption::Basic)
.unwrap();
assert_eq!(segment_postings.skip_next(0), SkipResult::End);
assert_eq!(segment_postings.skip_next(0), SkipResult::Reached);
assert_eq!(segment_postings.doc(), 0);
assert!(segment_reader.is_deleted(0));
let mut segment_postings = segment_reader
.inverted_index(term_2.field())

View File

@@ -7,7 +7,6 @@ use postings::Postings;
use docset::{DocSet, SkipResult};
use fst::Streamer;
use compression::compressed_block_size;
use postings::{NoDelete, DeleteSet};
use directory::{ReadOnlySource, SourceRead};
use postings::FreqReadingOption;
use postings::serializer::PostingsSerializer;
@@ -53,20 +52,18 @@ impl PositionComputer {
///
/// As we iterate through the `SegmentPostings`, the frequencies are optionally decoded.
/// Positions on the other hand, are optionally entirely decoded upfront.
pub struct SegmentPostings<TDeleteSet: DeleteSet> {
pub struct SegmentPostings {
block_cursor: BlockSegmentPostings,
cur: usize,
delete_bitset: TDeleteSet,
position_computer: Option<PositionComputer>,
}
impl SegmentPostings<NoDelete> {
impl SegmentPostings {
/// Returns an empty segment postings object
pub fn empty() -> Self {
let empty_block_cursor = BlockSegmentPostings::empty();
SegmentPostings {
block_cursor: empty_block_cursor,
delete_bitset: NoDelete,
cur: COMPRESSION_BLOCK_SIZE,
position_computer: None,
}
@@ -80,7 +77,7 @@ impl SegmentPostings<NoDelete> {
/// It serializes the doc ids using tantivy's codec
/// and returns a `SegmentPostings` object that embeds a
/// buffer with the serialized data.
pub fn create_from_docs(docs: &[u32]) -> SegmentPostings<NoDelete> {
pub fn create_from_docs(docs: &[u32]) -> SegmentPostings {
let mut counting_writer = CountingWriter::wrap(Vec::new());
{
let mut postings_serializer = PostingsSerializer::new(&mut counting_writer, false);
@@ -96,13 +93,11 @@ impl SegmentPostings<NoDelete> {
SourceRead::from(data),
FreqReadingOption::NoFreq,
);
SegmentPostings::from_block_postings(block_segment_postings, NoDelete, None)
SegmentPostings::from_block_postings(block_segment_postings, None)
}
}
impl<TDeleteSet: DeleteSet> SegmentPostings<TDeleteSet> {
impl SegmentPostings {
/// Reads a Segment postings from an &[u8]
///
@@ -112,13 +107,11 @@ impl<TDeleteSet: DeleteSet> SegmentPostings<TDeleteSet> {
/// frequencies and/or positions
pub fn from_block_postings(
segment_block_postings: BlockSegmentPostings,
delete_bitset: TDeleteSet,
positions_stream_opt: Option<CompressedIntStream>,
) -> SegmentPostings<TDeleteSet> {
) -> SegmentPostings {
SegmentPostings {
block_cursor: segment_block_postings,
cur: COMPRESSION_BLOCK_SIZE, // cursor within the block
delete_bitset,
position_computer: positions_stream_opt.map(PositionComputer::new),
}
}
@@ -142,9 +135,9 @@ fn exponential_search(target: u32, mut start: usize, arr: &[u32]) -> (usize, usi
}
}
impl<TDeleteSet: DeleteSet> DocSet for SegmentPostings<TDeleteSet> {
impl DocSet for SegmentPostings {
fn skip_next(&mut self, target: DocId) -> SkipResult {
if !self.adv ance() {
if !self.advance() {
return SkipResult::End;
}
if self.doc() == target {
@@ -188,42 +181,33 @@ impl<TDeleteSet: DeleteSet> DocSet for SegmentPostings<TDeleteSet> {
break;
}
}
{
// we're in the right block now, start with an exponential search
let block_docs = self.block_cursor.docs();
let (mut start, end) = exponential_search(target, self.cur, block_docs);
// we're in the right block now, start with an exponential search
let block_docs = self.block_cursor.docs();
start += block_docs[start..end]
.binary_search(&target)
.unwrap_or_else(|e| e);
let (mut start, end) = exponential_search(target, self.cur, block_docs);
// `doc` is now the first element >= `target`
let doc = block_docs[start];
debug_assert!(doc >= target);
start += block_docs[start..end]
.binary_search(&target)
.unwrap_or_else(|e| e);
if self.position_computer.is_some() {
let freqs_skipped = &self.block_cursor.freqs()[self.cur..start];
let sum_freqs: u32 = freqs_skipped.iter().sum();
self.position_computer.as_mut()
.unwrap()
.add_skip(sum_freqs as usize);
}
// `doc` is now the first element >= `target`
let doc = block_docs[start];
debug_assert!(doc >= target);
self.cur = start;
if !self.delete_bitset.is_deleted(doc) {
if doc == target {
return SkipResult::Reached;
} else {
return SkipResult::OverStep;
}
}
if self.position_computer.is_some() {
let freqs_skipped = &self.block_cursor.freqs()[self.cur..start];
let sum_freqs: u32 = freqs_skipped.iter().sum();
self.position_computer.as_mut()
.unwrap()
.add_skip(sum_freqs as usize);
}
if self.advance() {
SkipResult::OverStep
self.cur = start;
if doc == target {
return SkipResult::Reached;
} else {
SkipResult::End
return SkipResult::OverStep;
}
}
@@ -232,25 +216,19 @@ impl<TDeleteSet: DeleteSet> DocSet for SegmentPostings<TDeleteSet> {
// next needs to be called a first time to point to the correct element.
#[inline]
fn advance(&mut self) -> bool {
loop {
{
if self.position_computer.is_some() {
let term_freq = self.term_freq() as usize;
self.position_computer.as_mut().unwrap().add_skip(term_freq);
}
}
self.cur += 1;
if self.cur >= self.block_cursor.block_len() {
self.cur = 0;
if !self.block_cursor.advance() {
self.cur = COMPRESSION_BLOCK_SIZE;
return false;
}
}
if !self.delete_bitset.is_deleted(self.doc()) {
return true;
if self.position_computer.is_some() {
let term_freq = self.term_freq() as usize;
self.position_computer.as_mut().unwrap().add_skip(term_freq);
}
self.cur += 1;
if self.cur >= self.block_cursor.block_len() {
self.cur = 0;
if !self.block_cursor.advance() {
self.cur = COMPRESSION_BLOCK_SIZE;
return false;
}
}
true
}
fn size_hint(&self) -> u32 {
@@ -285,13 +263,13 @@ impl<TDeleteSet: DeleteSet> DocSet for SegmentPostings<TDeleteSet> {
}
impl<TDeleteSet: DeleteSet> HasLen for SegmentPostings<TDeleteSet> {
impl HasLen for SegmentPostings {
fn len(&self) -> usize {
self.block_cursor.doc_freq()
}
}
impl<TDeleteSet: DeleteSet> Postings for SegmentPostings<TDeleteSet> {
impl Postings for SegmentPostings {
fn term_freq(&self) -> u32 {
self.block_cursor.freq(self.cur)
}

View File

@@ -12,7 +12,7 @@ use query::RequiredOptionalScorer;
use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner};
use Result;
use query::intersect_scorers;
use query::term_query::{TermScorerWithDeletes, TermScorerNoDeletes};
use query::term_query::TermScorer;
fn scorer_union<TScoreCombiner>(scorers: Vec<Box<Scorer>>) -> Box<Scorer>
@@ -27,32 +27,18 @@ where
{
let is_all_term_queries = scorers.iter().all(|scorer| {
let scorer_ref: &Scorer = scorer.borrow();
Downcast::<TermScorerWithDeletes>::is_type(scorer_ref)
Downcast::<TermScorer>::is_type(scorer_ref)
});
if is_all_term_queries {
let scorers: Vec<TermScorerWithDeletes> = scorers
let scorers: Vec<TermScorer> = scorers
.into_iter()
.map(|scorer| *Downcast::<TermScorerWithDeletes>::downcast(scorer).unwrap())
.map(|scorer| *Downcast::<TermScorer>::downcast(scorer).unwrap())
.collect();
let scorer: Box<Scorer> = box Union::<TermScorerWithDeletes, TScoreCombiner>::from(scorers);
let scorer: Box<Scorer> = box Union::<TermScorer, TScoreCombiner>::from(scorers);
return scorer;
}
}
{
let is_all_term_queries = scorers.iter().all(|scorer| {
let scorer_ref: &Scorer = scorer.borrow();
Downcast::<TermScorerNoDeletes>::is_type(scorer_ref)
});
if is_all_term_queries {
let scorers: Vec<TermScorerNoDeletes> = scorers
.into_iter()
.map(|scorer| *Downcast::<TermScorerNoDeletes>::downcast(scorer).unwrap())
.collect();
let scorer: Box<Scorer> = box Union::<TermScorerNoDeletes, TScoreCombiner>::from(scorers);
return scorer;
}
}
let scorer: Box<Scorer> = box Union::<_, TScoreCombiner>::from(scorers);
return scorer;

View File

@@ -19,7 +19,7 @@ mod tests {
use query::QueryParser;
use query::RequiredOptionalScorer;
use query::score_combiner::SumWithCoordsCombiner;
use query::term_query::TermScorerNoDeletes;
use query::term_query::TermScorer;
fn aux_test_helper() -> (Index, Field) {
let mut schema_builder = SchemaBuilder::default();
@@ -71,7 +71,7 @@ mod tests {
let searcher = index.searcher();
let weight = query.weight(&*searcher, true).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
assert!(Downcast::<TermScorerNoDeletes>::is_type(&*scorer));
assert!(Downcast::<TermScorer>::is_type(&*scorer));
}
#[test]
@@ -83,7 +83,7 @@ mod tests {
let query = query_parser.parse_query("+a +b +c").unwrap();
let weight = query.weight(&*searcher, true).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
assert!(Downcast::<Intersection<TermScorerNoDeletes>>::is_type(&*scorer));
assert!(Downcast::<Intersection<TermScorer>>::is_type(&*scorer));
}
{
let query = query_parser.parse_query("+a +(b c)").unwrap();
@@ -111,7 +111,7 @@ mod tests {
let weight = query.weight(&*searcher, false).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
println!("{:?}", scorer.type_name());
assert!(Downcast::<TermScorerNoDeletes>::is_type(&*scorer));
assert!(Downcast::<TermScorer>::is_type(&*scorer));
}
}

View File

@@ -5,7 +5,7 @@ use DocId;
use downcast::Downcast;
use std::borrow::Borrow;
use Score;
use query::term_query::{TermScorerNoDeletes, TermScorerWithDeletes};
use query::term_query::TermScorer;
/// Returns the intersection scorer.
///
@@ -28,10 +28,10 @@ pub fn intersect_scorers(mut scorers: Vec<Box<Scorer>>) -> Box<Scorer> {
{
if [&left, &right].into_iter().all(|scorer| {
let scorer_ref: &Scorer = (*scorer).borrow();
Downcast::<TermScorerWithDeletes>::is_type(scorer_ref)
Downcast::<TermScorer>::is_type(scorer_ref)
}) {
let left = *Downcast::<TermScorerWithDeletes>::downcast(left).unwrap();
let right = *Downcast::<TermScorerWithDeletes>::downcast(right).unwrap();
let left = *Downcast::<TermScorer>::downcast(left).unwrap();
let right = *Downcast::<TermScorer>::downcast(right).unwrap();
return box Intersection {
left,
right,
@@ -40,29 +40,11 @@ pub fn intersect_scorers(mut scorers: Vec<Box<Scorer>>) -> Box<Scorer> {
}
}
}
{
if [&left, &right].into_iter()
.all(|scorer| {
let scorer_ref: &Scorer = (*scorer).borrow();
Downcast::<TermScorerNoDeletes>::is_type(scorer_ref)
}) {
let left = *Downcast::<TermScorerNoDeletes>::downcast(left).unwrap();
let right = *Downcast::<TermScorerNoDeletes>::downcast(right).unwrap();
return box Intersection {
left,
right,
others: scorers,
num_docsets
}
}
}
{
return box Intersection {
left,
right,
others: scorers,
num_docsets
}
return box Intersection {
left,
right,
others: scorers,
num_docsets
}
}
_ => { unreachable!(); }

View File

@@ -79,11 +79,9 @@ pub trait Query: fmt::Debug {
let _ = segment_search_timer.open("set_segment");
collector.set_segment(segment_ord as SegmentLocalId, segment_reader)?;
}
let _collection_timer = segment_search_timer.open("collection");
let mut scorer = weight.scorer(segment_reader)?;
{
let _collection_timer = segment_search_timer.open("collection");
scorer.collect(collector);
}
scorer.collect(collector, segment_reader.delete_bitset());
}
}
Ok(timer_tree)

View File

@@ -5,6 +5,7 @@ use docset::{DocSet, SkipResult};
use common::BitSet;
use std::ops::DerefMut;
use downcast;
use fastfield::DeleteBitSet;
/// Scored set of documents matching a query within a specific segment.
///
@@ -17,13 +18,23 @@ pub trait Scorer: downcast::Any + DocSet + 'static {
/// Consumes the complete `DocSet` and
/// push the scored documents to the collector.
fn collect(&mut self, collector: &mut Collector) {
while self.advance() {
collector.collect(self.doc(), self.score());
fn collect(&mut self, collector: &mut Collector, delete_bitset_opt: Option<&DeleteBitSet>) {
if let Some(delete_bitset) = delete_bitset_opt {
while self.advance() {
let doc = self.doc();
if !delete_bitset.is_deleted(doc) {
collector.collect(doc, self.score());
}
}
} else {
while self.advance() {
collector.collect(self.doc(), self.score());
}
}
}
}
#[allow(missing_docs)]
mod downcast_impl {
downcast!(super::Scorer);
@@ -34,9 +45,9 @@ impl Scorer for Box<Scorer> {
self.deref_mut().score()
}
fn collect(&mut self, collector: &mut Collector) {
fn collect(&mut self, collector: &mut Collector, delete_bitset: Option<&DeleteBitSet>) {
let scorer = self.deref_mut();
scorer.collect(collector);
scorer.collect(collector, delete_bitset);
}
}
@@ -50,6 +61,7 @@ impl DocSet for EmptyScorer {
false
}
fn doc(&self) -> DocId {
panic!(
"You may not call .doc() on a scorer \

View File

@@ -6,16 +6,6 @@ pub use self::term_query::TermQuery;
pub use self::term_weight::TermWeight;
pub use self::term_scorer::TermScorer;
use postings::SegmentPostings;
use postings::NoDelete;
use fastfield::DeleteBitSet;
pub(crate) type TermScorerWithDeletes = TermScorer<SegmentPostings<DeleteBitSet>>;
pub(crate) type TermScorerNoDeletes = TermScorer<SegmentPostings<NoDelete>>;
#[cfg(test)]
mod tests {

View File

@@ -6,18 +6,19 @@ use query::Scorer;
use postings::Postings;
use fieldnorm::FieldNormReader;
use query::bm25::BM25Weight;
use postings::SegmentPostings;
pub struct TermScorer<TPostings: Postings> {
postings: TPostings,
pub struct TermScorer {
postings: SegmentPostings,
fieldnorm_reader: FieldNormReader,
similarity_weight: BM25Weight,
}
impl<TPostings: Postings> TermScorer<TPostings> {
pub fn new(postings: TPostings,
impl TermScorer {
pub fn new(postings: SegmentPostings,
fieldnorm_reader: FieldNormReader,
similarity_weight: BM25Weight) -> TermScorer<TPostings> {
similarity_weight: BM25Weight) -> TermScorer {
TermScorer {
postings,
fieldnorm_reader,
@@ -26,7 +27,7 @@ impl<TPostings: Postings> TermScorer<TPostings> {
}
}
impl<TPostings: Postings> DocSet for TermScorer<TPostings> {
impl DocSet for TermScorer {
fn advance(&mut self) -> bool {
self.postings.advance()
}
@@ -44,7 +45,7 @@ impl<TPostings: Postings> DocSet for TermScorer<TPostings> {
}
}
impl<TPostings: Postings> Scorer for TermScorer<TPostings> {
impl Scorer for TermScorer {
fn score(&mut self) -> Score {
let doc = self.doc();
let fieldnorm_id = self.fieldnorm_reader.fieldnorm_id(doc);

View File

@@ -6,8 +6,6 @@ use docset::DocSet;
use postings::SegmentPostings;
use schema::IndexRecordOption;
use super::term_scorer::TermScorer;
use fastfield::DeleteBitSet;
use postings::NoDelete;
use Result;
use query::bm25::BM25Weight;
@@ -24,33 +22,18 @@ impl Weight for TermWeight {
let inverted_index = reader.inverted_index(field);
let fieldnorm_reader = reader.get_fieldnorms_reader(field);
let similarity_weight = self.similarity_weight.clone();
if reader.has_deletes() {
let postings_opt: Option<SegmentPostings<DeleteBitSet>> =
let postings_opt: Option<SegmentPostings> =
inverted_index.read_postings(&self.term, self.index_record_option);
if let Some(segment_postings) = postings_opt {
Ok(box TermScorer::new(segment_postings,
fieldnorm_reader,
similarity_weight))
} else {
Ok(box TermScorer::new(
SegmentPostings::<NoDelete>::empty(),
fieldnorm_reader,
similarity_weight))
}
} else {
let postings_opt: Option<SegmentPostings<NoDelete>> =
inverted_index.read_postings_no_deletes(&self.term, self.index_record_option);
if let Some(segment_postings) = postings_opt {
Ok(box TermScorer::new(segment_postings,
fieldnorm_reader,
similarity_weight))
} else {
Ok(box TermScorer::new(
SegmentPostings::<NoDelete>::empty(),
SegmentPostings::empty(),
fieldnorm_reader,
similarity_weight))
}
}
}
fn count(&self, reader: &SegmentReader) -> Result<u32> {