diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index 4a81b6293..e72e6d19d 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -4,11 +4,9 @@ use postings::{BlockSegmentPostings, SegmentPostings}; use postings::TermInfo; use schema::IndexRecordOption; use schema::Term; -use fastfield::DeleteBitSet; use compression::CompressedIntStream; use postings::FreqReadingOption; use common::BinarySerializable; -use postings::{DeleteSet, NoDelete}; use schema::FieldType; /// The inverted index reader is in charge of accessing @@ -28,7 +26,6 @@ pub struct InvertedIndexReader { termdict: TermDictionaryImpl, postings_source: ReadOnlySource, positions_source: ReadOnlySource, - delete_bitset_opt: Option, record_option: IndexRecordOption, total_num_tokens: u64 } @@ -38,7 +35,6 @@ impl InvertedIndexReader { termdict: TermDictionaryImpl, postings_source: ReadOnlySource, positions_source: ReadOnlySource, - delete_bitset_opt: Option, record_option: IndexRecordOption, ) -> InvertedIndexReader { let total_num_tokens_data = postings_source.slice(0, 8); @@ -48,7 +44,6 @@ impl InvertedIndexReader { termdict, postings_source: postings_source.slice_from(8), positions_source, - delete_bitset_opt, record_option, total_num_tokens } @@ -64,7 +59,6 @@ impl InvertedIndexReader { termdict: TermDictionaryImpl::empty(field_type), postings_source: ReadOnlySource::empty(), positions_source: ReadOnlySource::empty(), - delete_bitset_opt: None, record_option, total_num_tokens: 0u64 } @@ -129,15 +123,12 @@ impl InvertedIndexReader { /// This method is for an advanced usage only. /// /// Most user should prefer using `read_postings` instead. - pub fn read_postings_from_terminfo( + pub fn read_postings_from_terminfo( &self, term_info: &TermInfo, option: IndexRecordOption, - ) -> SegmentPostings { + ) -> SegmentPostings { let block_postings = self.read_block_postings_from_terminfo(term_info, option); - let delete_set = TDeleteSet::from(self.delete_bitset_opt.iter() - .cloned() - .next()); let position_stream = { if option.has_positions() { let position_offset = term_info.positions_offset; @@ -149,7 +140,7 @@ impl InvertedIndexReader { None } }; - SegmentPostings::from_block_postings(block_postings, delete_set, position_stream) + SegmentPostings::from_block_postings(block_postings, position_stream) } /// Returns the total number of tokens recorded for all documents @@ -170,12 +161,12 @@ impl InvertedIndexReader { /// For instance, requesting `IndexRecordOption::Freq` for a /// `TextIndexingOptions` that does not index position will return a `SegmentPostings` /// with `DocId`s and frequencies. - pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option> { + pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option { let term_info = get!(self.get_term_info(term)); Some(self.read_postings_from_terminfo(&term_info, option)) } - pub(crate) fn read_postings_no_deletes(&self, term: &Term, option: IndexRecordOption) -> Option> { + pub(crate) fn read_postings_no_deletes(&self, term: &Term, option: IndexRecordOption) -> Option { let term_info = get!(self.get_term_info(term)); Some(self.read_postings_from_terminfo(&term_info, option)) } diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index d6a3fd2d0..5d80ec3ca 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -26,7 +26,6 @@ use termdict::TermDictionary; use fastfield::{FastValue, MultiValueIntFastFieldReader}; use schema::Cardinality; use fieldnorm::FieldNormReader; -use postings::DeleteSet; /// Entry point to access all of the datastructures of the `Segment` /// @@ -285,7 +284,6 @@ impl SegmentReader { TermDictionaryImpl::from_source(termdict_source), postings_source, positions_source, - self.delete_bitset_opt.clone(), record_option, )); diff --git a/src/fastfield/delete.rs b/src/fastfield/delete.rs index 3134c3fc1..cc541cc18 100644 --- a/src/fastfield/delete.rs +++ b/src/fastfield/delete.rs @@ -5,7 +5,6 @@ use std::io; use directory::ReadOnlySource; use DocId; use common::HasLen; -use postings::DeleteSet; /// Write a delete `BitSet` /// @@ -52,19 +51,7 @@ impl DeleteBitSet { } } -} - - -impl DeleteSet for DeleteBitSet { - - fn empty() -> DeleteBitSet { - DeleteBitSet { - data: ReadOnlySource::empty(), - len: 0, - } - } - - fn is_deleted(&self, doc: DocId) -> bool { + pub fn is_deleted(&self, doc: DocId) -> bool { if self.len == 0 { false } else { @@ -78,16 +65,6 @@ impl DeleteSet for DeleteBitSet { } -impl From> for DeleteBitSet { - fn from(delete_bitset_opt: Option) -> Self { - if let Some(delete_bitset) = delete_bitset_opt { - delete_bitset - } else { - DeleteBitSet::empty() - } - } -} - impl HasLen for DeleteBitSet { fn len(&self) -> usize { self.len diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 00beb1633..c9eb84976 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -19,7 +19,6 @@ use termdict::TermStreamer; use fieldnorm::FieldNormsSerializer; use fieldnorm::FieldNormsWriter; use fieldnorm::FieldNormReader; -use postings::DeleteSet; use postings::Postings; @@ -296,12 +295,13 @@ impl IndexMerger { let segment_reader = &self.readers[heap_item.segment_ord]; let inverted_index = segment_reader.inverted_index(indexed_field); let mut segment_postings = inverted_index - .read_postings_from_terminfo::(term_info, segment_postings_option); - if segment_postings.advance() { - Some((segment_ord, segment_postings)) - } else { - None + .read_postings_from_terminfo(term_info, segment_postings_option); + while segment_postings.advance() { + if !segment_reader.is_deleted(segment_postings.doc()) { + return Some((segment_ord, segment_postings)); + } } + None }) .collect(); @@ -309,7 +309,6 @@ impl IndexMerger { // of all of the segments containing the given term. // // These segments are non-empty and advance has already been called. - if !segment_postings.is_empty() { // If not, the `term` will be entirely removed. diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs index e10c01151..e33f23cc2 100644 --- a/src/indexer/mod.rs +++ b/src/indexer/mod.rs @@ -1,7 +1,7 @@ pub mod index_writer; pub mod segment_serializer; pub mod merger; -mod merge_policy; +pub mod merge_policy; mod log_merge_policy; mod segment_register; mod segment_writer; diff --git a/src/lib.rs b/src/lib.rs index e2469b24c..6af44d613 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -292,8 +292,7 @@ mod tests { use Postings; use rand::{Rng, SeedableRng, XorShiftRng}; use rand::distributions::{IndependentSample, Range}; - - + pub fn assert_nearly_equals(expected: f32, val: f32) { assert!(nearly_equals(val, expected), "Got {}, expected {}.", val, expected); } @@ -460,6 +459,16 @@ mod tests { } } + + fn advance_undeleted(docset: &mut DocSet, reader: &SegmentReader) -> bool { + while docset.advance() { + if !reader.is_deleted(docset.doc()) { + return true; + } + } + false + } + #[test] fn test_delete_postings1() { let mut schema_builder = SchemaBuilder::default(); @@ -525,19 +534,19 @@ mod tests { let mut postings = inverted_index .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) .unwrap(); - assert!(postings.advance()); + assert!(advance_undeleted(&mut postings, reader)); assert_eq!(postings.doc(), 5); - assert!(!postings.advance()); + assert!(!advance_undeleted(&mut postings, reader)); } { let mut postings = inverted_index .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions) .unwrap(); - assert!(postings.advance()); + assert!(advance_undeleted(&mut postings, reader)); assert_eq!(postings.doc(), 3); - assert!(postings.advance()); + assert!(advance_undeleted(&mut postings, reader)); assert_eq!(postings.doc(), 4); - assert!(!postings.advance()); + assert!(!advance_undeleted(&mut postings, reader)); } } { @@ -569,19 +578,19 @@ mod tests { let mut postings = inverted_index .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) .unwrap(); - assert!(postings.advance()); + assert!(advance_undeleted(&mut postings, reader)); assert_eq!(postings.doc(), 5); - assert!(!postings.advance()); + assert!(!advance_undeleted(&mut postings, reader)); } { let mut postings = inverted_index .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions) .unwrap(); - assert!(postings.advance()); + assert!(advance_undeleted(&mut postings, reader)); assert_eq!(postings.doc(), 3); - assert!(postings.advance()); + assert!(advance_undeleted(&mut postings, reader)); assert_eq!(postings.doc(), 4); - assert!(!postings.advance()); + assert!(!advance_undeleted(&mut postings, reader)); } } { @@ -612,25 +621,25 @@ mod tests { let mut postings = inverted_index .read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions) .unwrap(); - assert!(!postings.advance()); + assert!(!advance_undeleted(&mut postings, reader)); } { let mut postings = inverted_index .read_postings(&term_b, IndexRecordOption::WithFreqsAndPositions) .unwrap(); - assert!(postings.advance()); + assert!(advance_undeleted(&mut postings, reader)); assert_eq!(postings.doc(), 3); - assert!(postings.advance()); + assert!(advance_undeleted(&mut postings, reader)); assert_eq!(postings.doc(), 4); - assert!(!postings.advance()); + assert!(!advance_undeleted(&mut postings, reader)); } { let mut postings = inverted_index .read_postings(&term_c, IndexRecordOption::WithFreqsAndPositions) .unwrap(); - assert!(postings.advance()); + assert!(advance_undeleted(&mut postings, reader)); assert_eq!(postings.doc(), 4); - assert!(!postings.advance()); + assert!(!advance_undeleted(&mut postings, reader)); } } } diff --git a/src/postings/delete_set.rs b/src/postings/delete_set.rs deleted file mode 100644 index 068863e68..000000000 --- a/src/postings/delete_set.rs +++ /dev/null @@ -1,27 +0,0 @@ -use fastfield::DeleteBitSet; -use DocId; - -pub trait DeleteSet: 'static + From> { - fn is_deleted(&self, doc: DocId) -> bool; - fn empty() -> Self; -} - - -#[derive(Default)] -pub struct NoDelete; -impl DeleteSet for NoDelete { - #[inline(always)] - fn is_deleted(&self, _doc: DocId) -> bool { - false - } - fn empty() -> Self { - NoDelete - } -} - -impl From> for NoDelete { - fn from(delete_bitset_opt: Option) -> Self { - assert!(delete_bitset_opt.is_none(), "NoDelete should not be used if there are some deleted documents."); - NoDelete - } -} \ No newline at end of file diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 75c2e4c96..69db5904e 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -13,13 +13,11 @@ mod serializer; mod postings_writer; mod term_info; mod segment_postings; -mod delete_set; use self::recorder::{NothingRecorder, Recorder, TFAndPositionRecorder, TermFrequencyRecorder}; pub use self::serializer::{FieldSerializer, InvertedIndexSerializer}; pub(crate) use self::postings_writer::MultiFieldPostingsWriter; -pub use self::delete_set::{DeleteSet, NoDelete}; pub use self::term_info::TermInfo; pub use self::postings::Postings; @@ -402,11 +400,9 @@ pub mod tests { { let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); index_writer.delete_term(term_0); - assert!(index_writer.commit().is_ok()); } index.load_searchers().unwrap(); - let searcher = index.searcher(); let segment_reader = searcher.segment_reader(0); @@ -418,8 +414,9 @@ pub mod tests { .unwrap(); if i % 2 == 0 { - assert_eq!(segment_postings.skip_next(i), SkipResult::OverStep); - assert_eq!(segment_postings.doc(), i + 1); + assert_eq!(segment_postings.skip_next(i), SkipResult::Reached); + assert_eq!(segment_postings.doc(), i); + assert!(segment_reader.is_deleted(i)); } else { assert_eq!(segment_postings.skip_next(i), SkipResult::Reached); assert_eq!(segment_postings.doc(), i); @@ -453,7 +450,7 @@ pub mod tests { // delete everything else { let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); - index_writer.delete_term(term_1); + index_writer.delete_term(term_1); assert!(index_writer.commit().is_ok()); } @@ -469,7 +466,9 @@ pub mod tests { .read_postings(&term_2, IndexRecordOption::Basic) .unwrap(); - assert_eq!(segment_postings.skip_next(0), SkipResult::End); + assert_eq!(segment_postings.skip_next(0), SkipResult::Reached); + assert_eq!(segment_postings.doc(), 0); + assert!(segment_reader.is_deleted(0)); let mut segment_postings = segment_reader .inverted_index(term_2.field()) diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index 17f107ed8..637420756 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -7,7 +7,6 @@ use postings::Postings; use docset::{DocSet, SkipResult}; use fst::Streamer; use compression::compressed_block_size; -use postings::{NoDelete, DeleteSet}; use directory::{ReadOnlySource, SourceRead}; use postings::FreqReadingOption; use postings::serializer::PostingsSerializer; @@ -53,20 +52,18 @@ impl PositionComputer { /// /// As we iterate through the `SegmentPostings`, the frequencies are optionally decoded. /// Positions on the other hand, are optionally entirely decoded upfront. -pub struct SegmentPostings { +pub struct SegmentPostings { block_cursor: BlockSegmentPostings, cur: usize, - delete_bitset: TDeleteSet, position_computer: Option, } -impl SegmentPostings { +impl SegmentPostings { /// Returns an empty segment postings object pub fn empty() -> Self { let empty_block_cursor = BlockSegmentPostings::empty(); SegmentPostings { block_cursor: empty_block_cursor, - delete_bitset: NoDelete, cur: COMPRESSION_BLOCK_SIZE, position_computer: None, } @@ -80,7 +77,7 @@ impl SegmentPostings { /// It serializes the doc ids using tantivy's codec /// and returns a `SegmentPostings` object that embeds a /// buffer with the serialized data. - pub fn create_from_docs(docs: &[u32]) -> SegmentPostings { + pub fn create_from_docs(docs: &[u32]) -> SegmentPostings { let mut counting_writer = CountingWriter::wrap(Vec::new()); { let mut postings_serializer = PostingsSerializer::new(&mut counting_writer, false); @@ -96,13 +93,11 @@ impl SegmentPostings { SourceRead::from(data), FreqReadingOption::NoFreq, ); - SegmentPostings::from_block_postings(block_segment_postings, NoDelete, None) + SegmentPostings::from_block_postings(block_segment_postings, None) } } -impl SegmentPostings { - - +impl SegmentPostings { /// Reads a Segment postings from an &[u8] /// @@ -112,13 +107,11 @@ impl SegmentPostings { /// frequencies and/or positions pub fn from_block_postings( segment_block_postings: BlockSegmentPostings, - delete_bitset: TDeleteSet, positions_stream_opt: Option, - ) -> SegmentPostings { + ) -> SegmentPostings { SegmentPostings { block_cursor: segment_block_postings, cur: COMPRESSION_BLOCK_SIZE, // cursor within the block - delete_bitset, position_computer: positions_stream_opt.map(PositionComputer::new), } } @@ -142,9 +135,9 @@ fn exponential_search(target: u32, mut start: usize, arr: &[u32]) -> (usize, usi } } -impl DocSet for SegmentPostings { +impl DocSet for SegmentPostings { fn skip_next(&mut self, target: DocId) -> SkipResult { - if !self.adv ance() { + if !self.advance() { return SkipResult::End; } if self.doc() == target { @@ -188,42 +181,33 @@ impl DocSet for SegmentPostings { break; } } - { - // we're in the right block now, start with an exponential search - let block_docs = self.block_cursor.docs(); - let (mut start, end) = exponential_search(target, self.cur, block_docs); + // we're in the right block now, start with an exponential search + let block_docs = self.block_cursor.docs(); - start += block_docs[start..end] - .binary_search(&target) - .unwrap_or_else(|e| e); + let (mut start, end) = exponential_search(target, self.cur, block_docs); - // `doc` is now the first element >= `target` - let doc = block_docs[start]; - debug_assert!(doc >= target); + start += block_docs[start..end] + .binary_search(&target) + .unwrap_or_else(|e| e); - if self.position_computer.is_some() { - let freqs_skipped = &self.block_cursor.freqs()[self.cur..start]; - let sum_freqs: u32 = freqs_skipped.iter().sum(); - self.position_computer.as_mut() - .unwrap() - .add_skip(sum_freqs as usize); - } + // `doc` is now the first element >= `target` + let doc = block_docs[start]; + debug_assert!(doc >= target); - self.cur = start; - - if !self.delete_bitset.is_deleted(doc) { - if doc == target { - return SkipResult::Reached; - } else { - return SkipResult::OverStep; - } - } + if self.position_computer.is_some() { + let freqs_skipped = &self.block_cursor.freqs()[self.cur..start]; + let sum_freqs: u32 = freqs_skipped.iter().sum(); + self.position_computer.as_mut() + .unwrap() + .add_skip(sum_freqs as usize); } - if self.advance() { - SkipResult::OverStep + + self.cur = start; + if doc == target { + return SkipResult::Reached; } else { - SkipResult::End + return SkipResult::OverStep; } } @@ -232,25 +216,19 @@ impl DocSet for SegmentPostings { // next needs to be called a first time to point to the correct element. #[inline] fn advance(&mut self) -> bool { - loop { - { - if self.position_computer.is_some() { - let term_freq = self.term_freq() as usize; - self.position_computer.as_mut().unwrap().add_skip(term_freq); - } - } - self.cur += 1; - if self.cur >= self.block_cursor.block_len() { - self.cur = 0; - if !self.block_cursor.advance() { - self.cur = COMPRESSION_BLOCK_SIZE; - return false; - } - } - if !self.delete_bitset.is_deleted(self.doc()) { - return true; + if self.position_computer.is_some() { + let term_freq = self.term_freq() as usize; + self.position_computer.as_mut().unwrap().add_skip(term_freq); + } + self.cur += 1; + if self.cur >= self.block_cursor.block_len() { + self.cur = 0; + if !self.block_cursor.advance() { + self.cur = COMPRESSION_BLOCK_SIZE; + return false; } } + true } fn size_hint(&self) -> u32 { @@ -285,13 +263,13 @@ impl DocSet for SegmentPostings { } -impl HasLen for SegmentPostings { +impl HasLen for SegmentPostings { fn len(&self) -> usize { self.block_cursor.doc_freq() } } -impl Postings for SegmentPostings { +impl Postings for SegmentPostings { fn term_freq(&self) -> u32 { self.block_cursor.freq(self.cur) } diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index f5b2511a2..7ba3bfc9d 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -12,7 +12,7 @@ use query::RequiredOptionalScorer; use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner}; use Result; use query::intersect_scorers; -use query::term_query::{TermScorerWithDeletes, TermScorerNoDeletes}; +use query::term_query::TermScorer; fn scorer_union(scorers: Vec>) -> Box @@ -27,32 +27,18 @@ where { let is_all_term_queries = scorers.iter().all(|scorer| { let scorer_ref: &Scorer = scorer.borrow(); - Downcast::::is_type(scorer_ref) + Downcast::::is_type(scorer_ref) }); if is_all_term_queries { - let scorers: Vec = scorers + let scorers: Vec = scorers .into_iter() - .map(|scorer| *Downcast::::downcast(scorer).unwrap()) + .map(|scorer| *Downcast::::downcast(scorer).unwrap()) .collect(); - let scorer: Box = box Union::::from(scorers); + let scorer: Box = box Union::::from(scorers); return scorer; } } - { - let is_all_term_queries = scorers.iter().all(|scorer| { - let scorer_ref: &Scorer = scorer.borrow(); - Downcast::::is_type(scorer_ref) - }); - if is_all_term_queries { - let scorers: Vec = scorers - .into_iter() - .map(|scorer| *Downcast::::downcast(scorer).unwrap()) - .collect(); - let scorer: Box = box Union::::from(scorers); - return scorer; - } - } let scorer: Box = box Union::<_, TScoreCombiner>::from(scorers); return scorer; diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index 141dc4fcb..6df8b8cec 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -19,7 +19,7 @@ mod tests { use query::QueryParser; use query::RequiredOptionalScorer; use query::score_combiner::SumWithCoordsCombiner; - use query::term_query::TermScorerNoDeletes; + use query::term_query::TermScorer; fn aux_test_helper() -> (Index, Field) { let mut schema_builder = SchemaBuilder::default(); @@ -71,7 +71,7 @@ mod tests { let searcher = index.searcher(); let weight = query.weight(&*searcher, true).unwrap(); let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap(); - assert!(Downcast::::is_type(&*scorer)); + assert!(Downcast::::is_type(&*scorer)); } #[test] @@ -83,7 +83,7 @@ mod tests { let query = query_parser.parse_query("+a +b +c").unwrap(); let weight = query.weight(&*searcher, true).unwrap(); let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap(); - assert!(Downcast::>::is_type(&*scorer)); + assert!(Downcast::>::is_type(&*scorer)); } { let query = query_parser.parse_query("+a +(b c)").unwrap(); @@ -111,7 +111,7 @@ mod tests { let weight = query.weight(&*searcher, false).unwrap(); let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap(); println!("{:?}", scorer.type_name()); - assert!(Downcast::::is_type(&*scorer)); + assert!(Downcast::::is_type(&*scorer)); } } diff --git a/src/query/intersection.rs b/src/query/intersection.rs index 35a5ab33e..874e7229c 100644 --- a/src/query/intersection.rs +++ b/src/query/intersection.rs @@ -5,7 +5,7 @@ use DocId; use downcast::Downcast; use std::borrow::Borrow; use Score; -use query::term_query::{TermScorerNoDeletes, TermScorerWithDeletes}; +use query::term_query::TermScorer; /// Returns the intersection scorer. /// @@ -28,10 +28,10 @@ pub fn intersect_scorers(mut scorers: Vec>) -> Box { { if [&left, &right].into_iter().all(|scorer| { let scorer_ref: &Scorer = (*scorer).borrow(); - Downcast::::is_type(scorer_ref) + Downcast::::is_type(scorer_ref) }) { - let left = *Downcast::::downcast(left).unwrap(); - let right = *Downcast::::downcast(right).unwrap(); + let left = *Downcast::::downcast(left).unwrap(); + let right = *Downcast::::downcast(right).unwrap(); return box Intersection { left, right, @@ -40,29 +40,11 @@ pub fn intersect_scorers(mut scorers: Vec>) -> Box { } } } - { - if [&left, &right].into_iter() - .all(|scorer| { - let scorer_ref: &Scorer = (*scorer).borrow(); - Downcast::::is_type(scorer_ref) - }) { - let left = *Downcast::::downcast(left).unwrap(); - let right = *Downcast::::downcast(right).unwrap(); - return box Intersection { - left, - right, - others: scorers, - num_docsets - } - } - } - { - return box Intersection { - left, - right, - others: scorers, - num_docsets - } + return box Intersection { + left, + right, + others: scorers, + num_docsets } } _ => { unreachable!(); } diff --git a/src/query/query.rs b/src/query/query.rs index 9f873b87b..fff292702 100644 --- a/src/query/query.rs +++ b/src/query/query.rs @@ -79,11 +79,9 @@ pub trait Query: fmt::Debug { let _ = segment_search_timer.open("set_segment"); collector.set_segment(segment_ord as SegmentLocalId, segment_reader)?; } + let _collection_timer = segment_search_timer.open("collection"); let mut scorer = weight.scorer(segment_reader)?; - { - let _collection_timer = segment_search_timer.open("collection"); - scorer.collect(collector); - } + scorer.collect(collector, segment_reader.delete_bitset()); } } Ok(timer_tree) diff --git a/src/query/scorer.rs b/src/query/scorer.rs index 7deadb0e5..3f9de7ab5 100644 --- a/src/query/scorer.rs +++ b/src/query/scorer.rs @@ -5,6 +5,7 @@ use docset::{DocSet, SkipResult}; use common::BitSet; use std::ops::DerefMut; use downcast; +use fastfield::DeleteBitSet; /// Scored set of documents matching a query within a specific segment. /// @@ -17,13 +18,23 @@ pub trait Scorer: downcast::Any + DocSet + 'static { /// Consumes the complete `DocSet` and /// push the scored documents to the collector. - fn collect(&mut self, collector: &mut Collector) { - while self.advance() { - collector.collect(self.doc(), self.score()); + fn collect(&mut self, collector: &mut Collector, delete_bitset_opt: Option<&DeleteBitSet>) { + if let Some(delete_bitset) = delete_bitset_opt { + while self.advance() { + let doc = self.doc(); + if !delete_bitset.is_deleted(doc) { + collector.collect(doc, self.score()); + } + } + } else { + while self.advance() { + collector.collect(self.doc(), self.score()); + } } } } + #[allow(missing_docs)] mod downcast_impl { downcast!(super::Scorer); @@ -34,9 +45,9 @@ impl Scorer for Box { self.deref_mut().score() } - fn collect(&mut self, collector: &mut Collector) { + fn collect(&mut self, collector: &mut Collector, delete_bitset: Option<&DeleteBitSet>) { let scorer = self.deref_mut(); - scorer.collect(collector); + scorer.collect(collector, delete_bitset); } } @@ -50,6 +61,7 @@ impl DocSet for EmptyScorer { false } + fn doc(&self) -> DocId { panic!( "You may not call .doc() on a scorer \ diff --git a/src/query/term_query/mod.rs b/src/query/term_query/mod.rs index 861b275c6..455ec3ca5 100644 --- a/src/query/term_query/mod.rs +++ b/src/query/term_query/mod.rs @@ -6,16 +6,6 @@ pub use self::term_query::TermQuery; pub use self::term_weight::TermWeight; pub use self::term_scorer::TermScorer; -use postings::SegmentPostings; -use postings::NoDelete; -use fastfield::DeleteBitSet; - -pub(crate) type TermScorerWithDeletes = TermScorer>; -pub(crate) type TermScorerNoDeletes = TermScorer>; - - - - #[cfg(test)] mod tests { diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs index 3d9473f70..12c73dacf 100644 --- a/src/query/term_query/term_scorer.rs +++ b/src/query/term_query/term_scorer.rs @@ -6,18 +6,19 @@ use query::Scorer; use postings::Postings; use fieldnorm::FieldNormReader; use query::bm25::BM25Weight; +use postings::SegmentPostings; -pub struct TermScorer { - postings: TPostings, +pub struct TermScorer { + postings: SegmentPostings, fieldnorm_reader: FieldNormReader, similarity_weight: BM25Weight, } -impl TermScorer { - pub fn new(postings: TPostings, +impl TermScorer { + pub fn new(postings: SegmentPostings, fieldnorm_reader: FieldNormReader, - similarity_weight: BM25Weight) -> TermScorer { + similarity_weight: BM25Weight) -> TermScorer { TermScorer { postings, fieldnorm_reader, @@ -26,7 +27,7 @@ impl TermScorer { } } -impl DocSet for TermScorer { +impl DocSet for TermScorer { fn advance(&mut self) -> bool { self.postings.advance() } @@ -44,7 +45,7 @@ impl DocSet for TermScorer { } } -impl Scorer for TermScorer { +impl Scorer for TermScorer { fn score(&mut self) -> Score { let doc = self.doc(); let fieldnorm_id = self.fieldnorm_reader.fieldnorm_id(doc); diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index ee8f75d1e..8f7827bb9 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -6,8 +6,6 @@ use docset::DocSet; use postings::SegmentPostings; use schema::IndexRecordOption; use super::term_scorer::TermScorer; -use fastfield::DeleteBitSet; -use postings::NoDelete; use Result; use query::bm25::BM25Weight; @@ -24,33 +22,18 @@ impl Weight for TermWeight { let inverted_index = reader.inverted_index(field); let fieldnorm_reader = reader.get_fieldnorms_reader(field); let similarity_weight = self.similarity_weight.clone(); - if reader.has_deletes() { - let postings_opt: Option> = + let postings_opt: Option = inverted_index.read_postings(&self.term, self.index_record_option); - if let Some(segment_postings) = postings_opt { - Ok(box TermScorer::new(segment_postings, - fieldnorm_reader, - similarity_weight)) - } else { - Ok(box TermScorer::new( - SegmentPostings::::empty(), - fieldnorm_reader, - similarity_weight)) - } - } else { - let postings_opt: Option> = - inverted_index.read_postings_no_deletes(&self.term, self.index_record_option); if let Some(segment_postings) = postings_opt { Ok(box TermScorer::new(segment_postings, fieldnorm_reader, similarity_weight)) } else { Ok(box TermScorer::new( - SegmentPostings::::empty(), + SegmentPostings::empty(), fieldnorm_reader, similarity_weight)) } - } } fn count(&self, reader: &SegmentReader) -> Result {