Added specialized SegmentPostings when there are no DeleteSet

This commit is contained in:
Paul Masurel
2018-02-21 23:26:24 +09:00
parent fdb9c3c516
commit e423784fd0
14 changed files with 401 additions and 183 deletions

View File

@@ -7,6 +7,7 @@ use schema::Term;
use fastfield::DeleteBitSet;
use compression::CompressedIntStream;
use postings::FreqReadingOption;
use postings::{DeleteSet, NoDelete};
/// The inverted index reader is in charge of accessing
/// the inverted index associated to a specific field.
@@ -25,7 +26,7 @@ pub struct InvertedIndexReader {
termdict: TermDictionaryImpl,
postings_source: ReadOnlySource,
positions_source: ReadOnlySource,
delete_bitset: DeleteBitSet,
delete_bitset_opt: Option<DeleteBitSet>,
record_option: IndexRecordOption,
}
@@ -34,14 +35,14 @@ impl InvertedIndexReader {
termdict_source: ReadOnlySource,
postings_source: ReadOnlySource,
positions_source: ReadOnlySource,
delete_bitset: DeleteBitSet,
delete_bitset_opt: Option<DeleteBitSet>,
record_option: IndexRecordOption,
) -> InvertedIndexReader {
InvertedIndexReader {
termdict: TermDictionaryImpl::from_source(termdict_source),
postings_source,
positions_source,
delete_bitset,
delete_bitset_opt,
record_option,
}
}
@@ -105,13 +106,15 @@ impl InvertedIndexReader {
/// This method is for an advanced usage only.
///
/// Most user should prefer using `read_postings` instead.
pub fn read_postings_from_terminfo(
pub fn read_postings_from_terminfo<TDeleteSet: DeleteSet>(
&self,
term_info: &TermInfo,
option: IndexRecordOption,
) -> SegmentPostings {
) -> SegmentPostings<TDeleteSet> {
let block_postings = self.read_block_postings_from_terminfo(term_info, option);
let delete_bitset = self.delete_bitset.clone();
let delete_set = TDeleteSet::from(self.delete_bitset_opt.iter()
.cloned()
.next());
let position_stream = {
if option.has_positions() {
let position_offset = term_info.positions_offset;
@@ -123,7 +126,7 @@ impl InvertedIndexReader {
None
}
};
SegmentPostings::from_block_postings(block_postings, delete_bitset, position_stream)
SegmentPostings::from_block_postings(block_postings, delete_set, position_stream)
}
/// Returns the segment postings associated with the term, and with the given option,
@@ -136,11 +139,17 @@ impl InvertedIndexReader {
/// For instance, requesting `IndexRecordOption::Freq` for a
/// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
/// with `DocId`s and frequencies.
pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings<DeleteBitSet>> {
let term_info = get!(self.get_term_info(term));
Some(self.read_postings_from_terminfo(&term_info, option))
}
pub(crate) fn read_postings_no_deletes(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings<NoDelete>> {
let term_info = get!(self.get_term_info(term));
Some(self.read_postings_from_terminfo(&term_info, option))
}
/// Returns the number of documents containing the term.
pub fn doc_freq(&self, term: &Term) -> u32 {
self.get_term_info(term)

View File

@@ -26,6 +26,7 @@ use schema::Schema;
use termdict::TermDictionary;
use fastfield::{FastValue, MultiValueIntFastFieldReader};
use schema::Cardinality;
use postings::DeleteSet;
/// Entry point to access all of the datastructures of the `Segment`
///
@@ -54,7 +55,7 @@ pub struct SegmentReader {
fieldnorms_composite: CompositeFile,
store_reader: StoreReader,
delete_bitset: DeleteBitSet,
delete_bitset_opt: Option<DeleteBitSet>,
schema: Schema,
}
@@ -79,7 +80,13 @@ impl SegmentReader {
/// Return the number of documents that have been
/// deleted in the segment.
pub fn num_deleted_docs(&self) -> DocId {
self.delete_bitset.len() as DocId
self.delete_bitset()
.map(|delete_set| delete_set.len() as DocId)
.unwrap_or(0u32)
}
pub fn has_deletes(&self) -> bool {
self.num_deleted_docs() > 0
}
/// Accessor to a segment's fast field reader given a field.
@@ -194,12 +201,13 @@ impl SegmentReader {
let fieldnorms_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
let fieldnorms_composite = CompositeFile::open(&fieldnorms_data)?;
let delete_bitset = if segment.meta().has_deletes() {
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
DeleteBitSet::open(delete_data)
} else {
DeleteBitSet::empty()
};
let delete_bitset_opt =
if segment.meta().has_deletes() {
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
Some(DeleteBitSet::open(delete_data))
} else {
None
};
let schema = segment.schema();
Ok(SegmentReader {
@@ -211,7 +219,7 @@ impl SegmentReader {
fieldnorms_composite,
segment_id: segment.id(),
store_reader,
delete_bitset,
delete_bitset_opt,
positions_composite,
schema,
})
@@ -253,7 +261,7 @@ impl SegmentReader {
termdict_source,
postings_source,
positions_source,
self.delete_bitset.clone(),
self.delete_bitset_opt.clone(),
record_option,
));
@@ -282,14 +290,16 @@ impl SegmentReader {
/// Returns the bitset representing
/// the documents that have been deleted.
pub fn delete_bitset(&self) -> &DeleteBitSet {
&self.delete_bitset
pub fn delete_bitset(&self) -> Option<&DeleteBitSet> {
self.delete_bitset_opt.as_ref()
}
/// Returns true iff the `doc` is marked
/// as deleted.
pub fn is_deleted(&self, doc: DocId) -> bool {
self.delete_bitset.is_deleted(doc)
self.delete_bitset()
.map(|delete_set| delete_set.is_deleted(doc))
.unwrap_or(false)
}
}

View File

@@ -5,6 +5,7 @@ use std::io;
use directory::ReadOnlySource;
use DocId;
use common::HasLen;
use postings::DeleteSet;
/// Write a delete `BitSet`
///
@@ -51,22 +52,19 @@ impl DeleteBitSet {
}
}
/// Returns an empty delete bit set.
pub fn empty() -> DeleteBitSet {
}
impl DeleteSet for DeleteBitSet {
fn empty() -> DeleteBitSet {
DeleteBitSet {
data: ReadOnlySource::empty(),
len: 0,
}
}
/// Returns true iff the segment has some deleted documents.
pub fn has_deletes(&self) -> bool {
self.len() > 0
}
/// Returns true iff the document is deleted.
#[inline]
pub fn is_deleted(&self, doc: DocId) -> bool {
fn is_deleted(&self, doc: DocId) -> bool {
if self.len == 0 {
false
} else {
@@ -76,6 +74,18 @@ impl DeleteBitSet {
b & (1u8 << shift) != 0
}
}
}
impl From<Option<DeleteBitSet>> for DeleteBitSet {
fn from(delete_bitset_opt: Option<DeleteBitSet>) -> Self {
if let Some(delete_bitset) = delete_bitset_opt {
delete_bitset
} else {
DeleteBitSet::empty()
}
}
}
impl HasLen for DeleteBitSet {

View File

@@ -17,6 +17,7 @@ use store::StoreWriter;
use std::cmp::{max, min};
use termdict::TermDictionary;
use termdict::TermStreamer;
use postings::DeleteSet;
pub struct IndexMerger {
schema: Schema,
@@ -27,22 +28,28 @@ pub struct IndexMerger {
fn compute_min_max_val(
u64_reader: &FastFieldReader<u64>,
max_doc: DocId,
delete_bitset: &DeleteBitSet,
delete_bitset_opt: Option<&DeleteBitSet>,
) -> Option<(u64, u64)> {
if max_doc == 0 {
None
} else if !delete_bitset.has_deletes() {
// no deleted documents,
// we can use the previous min_val, max_val.
Some((u64_reader.min_value(), u64_reader.max_value()))
} else {
// some deleted documents,
// we need to recompute the max / min
(0..max_doc)
.filter(|doc_id| !delete_bitset.is_deleted(*doc_id))
.map(|doc_id| u64_reader.get(doc_id))
.minmax()
.into_option()
match delete_bitset_opt {
Some(delete_bitset) => {
// some deleted documents,
// we need to recompute the max / min
(0..max_doc)
.filter(|doc_id| !delete_bitset.is_deleted(*doc_id))
.map(|doc_id| u64_reader.get(doc_id))
.minmax()
.into_option()
}
None => {
// no deleted documents,
// we can use the previous min_val, max_val.
Some((u64_reader.min_value(), u64_reader.max_value()))
}
}
}
}
@@ -150,7 +157,7 @@ impl IndexMerger {
if let Some((seg_min_val, seg_max_val)) = compute_min_max_val(
&u64_reader,
reader.max_doc(),
reader.delete_bitset(),
reader.delete_bitset()
) {
// the segment has some non-deleted documents
min_val = min(min_val, seg_min_val);
@@ -181,12 +188,15 @@ impl IndexMerger {
let mut fast_single_field_serializer =
fast_field_serializer.new_u64_fast_field(field, min_val, max_val)?;
for (max_doc, u64_reader, delete_bitset) in u64_readers {
for (max_doc, u64_reader, delete_bitset_opt) in u64_readers {
for doc_id in 0..max_doc {
if !delete_bitset.is_deleted(doc_id) {
let val = u64_reader.get(doc_id);
fast_single_field_serializer.add_val(val)?;
if let Some(ref delete_bitset) = delete_bitset_opt {
if delete_bitset.is_deleted(doc_id) {
continue;
}
}
let val = u64_reader.get(doc_id);
fast_single_field_serializer.add_val(val)?;
}
}
@@ -271,7 +281,7 @@ impl IndexMerger {
let segment_reader = &self.readers[heap_item.segment_ord];
let inverted_index = segment_reader.inverted_index(indexed_field);
let mut segment_postings = inverted_index
.read_postings_from_terminfo(term_info, segment_postings_option);
.read_postings_from_terminfo::<DeleteBitSet>(term_info, segment_postings_option);
if segment_postings.advance() {
Some((segment_ord, segment_postings))
} else {

View File

@@ -0,0 +1,27 @@
use fastfield::DeleteBitSet;
use DocId;
pub trait DeleteSet: 'static + From<Option<DeleteBitSet>> {
fn is_deleted(&self, doc: DocId) -> bool;
fn empty() -> Self;
}
#[derive(Default)]
pub struct NoDelete;
impl DeleteSet for NoDelete {
#[inline(always)]
fn is_deleted(&self, doc: DocId) -> bool {
false
}
fn empty() -> Self {
NoDelete
}
}
impl From<Option<DeleteBitSet>> for NoDelete {
fn from(delete_bitset_opt: Option<DeleteBitSet>) -> Self {
assert!(delete_bitset_opt.is_none(), "NoDelete should not be used if there are some deleted documents.");
NoDelete
}
}

View File

@@ -13,11 +13,13 @@ mod serializer;
mod postings_writer;
mod term_info;
mod segment_postings;
mod delete_set;
use self::recorder::{NothingRecorder, Recorder, TFAndPositionRecorder, TermFrequencyRecorder};
pub use self::serializer::{FieldSerializer, InvertedIndexSerializer};
pub(crate) use self::postings_writer::MultiFieldPostingsWriter;
pub use self::delete_set::{DeleteSet, NoDelete};
pub use self::term_info::TermInfo;
pub use self::postings::Postings;
@@ -42,7 +44,7 @@ pub mod tests {
use DocId;
use Score;
use query::Intersection;
use query::Scorer;
use query::{Weight, Scorer};
use schema::{Document, SchemaBuilder, Term, INT_INDEXED, STRING, TEXT};
use core::SegmentComponent;
use indexer::SegmentWriter;
@@ -97,53 +99,54 @@ pub mod tests {
index_writer.add_document(doc!(title => r#"abc be be be be abc"#));
index_writer.commit().unwrap();
index.load_searchers().unwrap();
let searcher = index.searcher();
let query = TermQuery::new(
Term::from_field_text(title, "abc"),
IndexRecordOption::WithFreqsAndPositions,
);
let weight = query.specialized_weight(&*searcher, true);
let inverted_index = searcher.segment_reader(0u32).inverted_index(title);
let term = Term::from_field_text(title, "abc");
{
let mut scorer = weight
.specialized_scorer(searcher.segment_reader(0u32))
let mut postings = inverted_index
.read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
scorer.advance();
assert_eq!(&[0, 1, 2], scorer.postings().positions());
scorer.advance();
assert_eq!(&[0, 5], scorer.postings().positions());
postings.advance();
assert_eq!(&[0, 1, 2], postings.positions());
postings.advance();
assert_eq!(&[0, 5], postings.positions());
}
{
let mut scorer = weight
.specialized_scorer(searcher.segment_reader(0u32))
let mut postings = inverted_index
.read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
scorer.advance();
scorer.advance();
assert_eq!(&[0, 5], scorer.postings().positions());
postings.advance();
postings.advance();
assert_eq!(&[0, 5], postings.positions());
}
{
let mut scorer = weight
.specialized_scorer(searcher.segment_reader(0u32))
let mut postings = inverted_index
.read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
assert_eq!(scorer.skip_next(1), SkipResult::Reached);
assert_eq!(scorer.doc(), 1);
assert_eq!(&[0, 5], scorer.postings().positions());
assert_eq!(postings.skip_next(1), SkipResult::Reached);
assert_eq!(postings.doc(), 1);
assert_eq!(&[0, 5], postings.positions());
}
{
let mut scorer = weight
.specialized_scorer(searcher.segment_reader(0u32))
let mut postings = inverted_index
.read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
assert_eq!(scorer.skip_next(1002), SkipResult::Reached);
assert_eq!(scorer.doc(), 1002);
assert_eq!(&[0, 5], scorer.postings().positions());
assert_eq!(postings.skip_next(1002), SkipResult::Reached);
assert_eq!(postings.doc(), 1002);
assert_eq!(&[0, 5], postings.positions());
}
{
let mut scorer = weight
.specialized_scorer(searcher.segment_reader(0u32))
let mut postings = inverted_index
.read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
assert_eq!(scorer.skip_next(100), SkipResult::Reached);
assert_eq!(scorer.skip_next(1002), SkipResult::Reached);
assert_eq!(scorer.doc(), 1002);
assert_eq!(&[0, 5], scorer.postings().positions());
assert_eq!(postings.skip_next(100), SkipResult::Reached);
assert_eq!(postings.skip_next(1002), SkipResult::Reached);
assert_eq!(postings.doc(), 1002);
assert_eq!(&[0, 5], postings.positions());
}
}
@@ -277,18 +280,16 @@ pub mod tests {
assert!(index_writer.commit().is_ok());
}
index.load_searchers().unwrap();
let term_query = TermQuery::new(
Term::from_field_text(text_field, "a"),
IndexRecordOption::Basic,
);
let term_a = Term::from_field_text(text_field, "a");
let searcher = index.searcher();
let mut term_weight = term_query.specialized_weight(&*searcher, true);
term_weight.index_record_option = IndexRecordOption::WithFreqsAndPositions;
let segment_reader = &searcher.segment_readers()[0];
let mut term_scorer = term_weight.specialized_scorer(segment_reader).unwrap();
assert!(term_scorer.advance());
assert_eq!(term_scorer.doc(), 1u32);
assert_eq!(term_scorer.postings().positions(), &[1u32, 4]);
let segment_reader = searcher.segment_reader(0);
let mut postings = segment_reader
.inverted_index(text_field)
.read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
.unwrap();
assert!(postings.advance());
assert_eq!(postings.doc(), 1u32);
assert_eq!(postings.positions(), &[1u32, 4]);
}
#[test]

View File

@@ -8,6 +8,7 @@ use docset::{DocSet, SkipResult};
use std::cmp;
use fst::Streamer;
use compression::compressed_block_size;
use postings::{NoDelete, DeleteSet};
use fastfield::DeleteBitSet;
use std::cell::UnsafeCell;
use directory::{ReadOnlySource, SourceRead};
@@ -66,14 +67,25 @@ impl PositionComputer {
///
/// As we iterate through the `SegmentPostings`, the frequencies are optionally decoded.
/// Positions on the other hand, are optionally entirely decoded upfront.
pub struct SegmentPostings {
pub struct SegmentPostings<TDeleteSet: DeleteSet> {
block_cursor: BlockSegmentPostings,
cur: usize,
delete_bitset: DeleteBitSet,
delete_bitset: TDeleteSet,
position_computer: Option<UnsafeCell<PositionComputer>>,
}
impl SegmentPostings {
impl SegmentPostings<NoDelete> {
/// Returns an empty segment postings object
pub fn empty() -> Self {
let empty_block_cursor = BlockSegmentPostings::empty();
SegmentPostings {
block_cursor: empty_block_cursor,
delete_bitset: NoDelete,
cur: COMPRESSION_BLOCK_SIZE,
position_computer: None,
}
}
/// Creates a segment postings object with the given documents
/// and no frequency encoded.
///
@@ -82,14 +94,14 @@ impl SegmentPostings {
/// It serializes the doc ids using tantivy's codec
/// and returns a `SegmentPostings` object that embeds a
/// buffer with the serialized data.
pub fn create_from_docs(docs: &[u32]) -> SegmentPostings {
pub fn create_from_docs(docs: &[u32]) -> SegmentPostings<NoDelete> {
let mut buffer = Vec::new();
{
let mut postings_serializer = PostingsSerializer::new(&mut buffer, false);
for &doc in docs {
postings_serializer.write_doc(doc, 1u32).unwrap();
}
postings_serializer.close_term().unwrap();
postings_serializer.close_term().expect("In memory Serialization should never fail.");
}
let data = ReadOnlySource::from(buffer);
let block_segment_postings = BlockSegmentPostings::from_data(
@@ -97,8 +109,20 @@ impl SegmentPostings {
SourceRead::from(data),
FreqReadingOption::NoFreq,
);
SegmentPostings::from_block_postings(block_segment_postings, DeleteBitSet::empty(), None)
SegmentPostings::from_block_postings(block_segment_postings, NoDelete, None)
}
}
impl<TDeleteSet: DeleteSet> SegmentPostings<TDeleteSet> {
fn position_add_skip<F: FnOnce() -> usize>(&self, num_skips_fn: F) {
if let Some(position_computer) = self.position_computer.as_ref() {
let num_skips = num_skips_fn();
unsafe {
(*position_computer.get()).add_skip(num_skips);
}
}
}
/// Reads a Segment postings from an &[u8]
///
@@ -108,9 +132,9 @@ impl SegmentPostings {
/// frequencies and/or positions
pub fn from_block_postings(
segment_block_postings: BlockSegmentPostings,
delete_bitset: DeleteBitSet,
delete_bitset: TDeleteSet,
positions_stream_opt: Option<CompressedIntStream>,
) -> SegmentPostings {
) -> SegmentPostings<TDeleteSet> {
let position_computer =
positions_stream_opt.map(|stream| UnsafeCell::new(PositionComputer::new(stream)));
SegmentPostings {
@@ -120,29 +144,9 @@ impl SegmentPostings {
position_computer,
}
}
/// Returns an empty segment postings object
pub fn empty() -> SegmentPostings {
let empty_block_cursor = BlockSegmentPostings::empty();
SegmentPostings {
block_cursor: empty_block_cursor,
delete_bitset: DeleteBitSet::empty(),
cur: COMPRESSION_BLOCK_SIZE,
position_computer: None,
}
}
fn position_add_skip<F: FnOnce() -> usize>(&self, num_skips_fn: F) {
if let Some(position_computer) = self.position_computer.as_ref() {
let num_skips = num_skips_fn();
unsafe {
(*position_computer.get()).add_skip(num_skips);
}
}
}
}
impl DocSet for SegmentPostings {
impl<TDeleteSet: DeleteSet> DocSet for SegmentPostings<TDeleteSet> {
// goes to the next element.
// next needs to be called a first time to point to the correct element.
#[inline]
@@ -299,13 +303,14 @@ impl DocSet for SegmentPostings {
}
}
impl HasLen for SegmentPostings {
impl<TDeleteSet: DeleteSet> HasLen for SegmentPostings<TDeleteSet> {
fn len(&self) -> usize {
self.block_cursor.doc_freq()
}
}
impl Postings for SegmentPostings {
impl<TDeleteSet: DeleteSet> Postings for SegmentPostings<TDeleteSet> {
fn term_freq(&self) -> u32 {
self.block_cursor.freq(self.cur)
}

View File

@@ -10,6 +10,9 @@ use std::borrow::Borrow;
use query::Exclude;
use query::Occur;
use query::RequiredOptionalScorer;
use query::IntersectionTwoTerms;
use fastfield::DeleteBitSet;
use postings::NoDelete;
use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner};
use Result;
@@ -19,24 +22,41 @@ where
{
assert!(!scorers.is_empty());
if scorers.len() == 1 {
scorers.into_iter().next().unwrap() //< we checked the size beforehands
} else {
return scorers.into_iter().next().unwrap(); //< we checked the size beforehands
}
{
let is_all_term_queries = scorers.iter().all(|scorer| {
let scorer_ref: &Scorer = scorer.borrow();
Downcast::<TermScorer>::is_type(scorer_ref)
Downcast::<TermScorer<DeleteBitSet>>::is_type(scorer_ref)
});
if is_all_term_queries {
let scorers: Vec<TermScorer> = scorers
let scorers: Vec<TermScorer<DeleteBitSet>> = scorers
.into_iter()
.map(|scorer| *Downcast::<TermScorer>::downcast(scorer).unwrap())
.map(|scorer| *Downcast::<TermScorer<DeleteBitSet>>::downcast(scorer).unwrap())
.collect();
let scorer: Box<Scorer> = box Union::<TermScorer, TScoreCombiner>::from(scorers);
scorer
} else {
let scorer: Box<Scorer> = box Union::<_, TScoreCombiner>::from(scorers);
scorer
let scorer: Box<Scorer> = box Union::<TermScorer<DeleteBitSet>, TScoreCombiner>::from(scorers);
return scorer;
}
}
{
let is_all_term_queries = scorers.iter().all(|scorer| {
let scorer_ref: &Scorer = scorer.borrow();
Downcast::<TermScorer<NoDelete>>::is_type(scorer_ref)
});
if is_all_term_queries {
let scorers: Vec<TermScorer<NoDelete>> = scorers
.into_iter()
.map(|scorer| *Downcast::<TermScorer<NoDelete>>::downcast(scorer).unwrap())
.collect();
let scorer: Box<Scorer> = box Union::<TermScorer<NoDelete>, TScoreCombiner>::from(scorers);
return scorer;
}
}
let scorer: Box<Scorer> = box Union::<_, TScoreCombiner>::from(scorers);
return scorer;
}
pub struct BooleanWeight {
@@ -74,26 +94,55 @@ impl BooleanWeight {
.map(scorer_union::<TScoreCombiner>);
let must_scorer_opt: Option<Box<Scorer>> =
per_occur_scorers.remove(&Occur::Must).map(|scorers| {
per_occur_scorers.remove(&Occur::Must).map(|mut scorers| {
if scorers.len() == 1 {
scorers.into_iter().next().unwrap()
} else {
return scorers.into_iter().next().unwrap();
}
scorers.sort_by_key(|scorer| scorer.size_hint());
{
let is_all_term_queries = scorers.iter().all(|scorer| {
let scorer_ref: &Scorer = scorer.borrow();
Downcast::<TermScorer>::is_type(scorer_ref)
Downcast::<TermScorer<DeleteBitSet>>::is_type(scorer_ref)
});
if is_all_term_queries {
let scorers: Vec<TermScorer> = scorers
.into_iter()
.map(|scorer| *Downcast::<TermScorer>::downcast(scorer).unwrap())
.collect();
let scorer: Box<Scorer> = box Intersection::from(scorers);
scorer
} else {
let scorer: Box<Scorer> = box Intersection::from(scorers);
scorer
if scorers.len() == 2 {
let right = scorers.pop().unwrap();
let left = scorers.pop().unwrap();
return box IntersectionTwoTerms::new(left, right);
} else {
let mut scorers: Vec<TermScorer<DeleteBitSet>> = scorers
.into_iter()
.map(|scorer| *Downcast::<TermScorer<DeleteBitSet>>::downcast(scorer).unwrap())
.collect();
let scorer: Box<Scorer> = box Intersection::from(scorers);
return scorer;
}
}
}
{
let is_all_term_queries = scorers.iter().all(|scorer| {
let scorer_ref: &Scorer = scorer.borrow();
Downcast::<TermScorer<NoDelete>>::is_type(scorer_ref)
});
if is_all_term_queries {
if scorers.len() == 2 {
let right = scorers.pop().unwrap();
let left = scorers.pop().unwrap();
return box IntersectionTwoTerms::new(left, right);
} else {
let mut scorers: Vec<TermScorer<NoDelete>> = scorers
.into_iter()
.map(|scorer| *Downcast::<TermScorer<NoDelete>>::downcast(scorer).unwrap())
.collect();
let scorer: Box<Scorer> = box Intersection::from(scorers);
return scorer;
}
}
}
{
let scorer: Box<Scorer> = box Intersection::from(scorers);
scorer
}
});
let positive_scorer: Box<Scorer> = match (should_scorer_opt, must_scorer_opt) {

View File

@@ -85,7 +85,7 @@ mod tests {
let query = query_parser.parse_query("+a +b +c").unwrap();
let weight = query.weight(&*searcher, true).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
assert!(Downcast::<Intersection<TermScorer>>::is_type(&*scorer));
assert!(Downcast::<Intersection<TermScorer<NoDelete>>>::is_type(&*scorer));
}
{
let query = query_parser.parse_query("+a +(b c)").unwrap();
@@ -95,6 +95,8 @@ mod tests {
}
}
use postings::NoDelete;
#[test]
pub fn test_boolean_reqopt() {
let (index, text_field) = aux_test_helper();
@@ -110,7 +112,7 @@ mod tests {
let query = query_parser.parse_query("+a b").unwrap();
let weight = query.weight(&*searcher, false).unwrap();
let scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
assert!(Downcast::<TermScorer>::is_type(&*scorer));
assert!(Downcast::<TermScorer<NoDelete>>::is_type(&*scorer));
}
}

View File

@@ -0,0 +1,73 @@
use docset::DocSet;
use query::Scorer;
use DocId;
use Score;
use std::mem;
use query::term_query::TermScorer;
use postings::DeleteSet;
use SkipResult;
/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s.
pub struct IntersectionTwoTerms<TDocSet> {
left: TDocSet,
right: TDocSet
}
impl<TDocSet: DocSet> IntersectionTwoTerms<TDocSet> {
pub fn new(left: TDocSet, mut right: TDocSet) -> IntersectionTwoTerms<TDocSet> {
IntersectionTwoTerms {
left,
right
}
}
}
impl<TDocSet: DocSet> DocSet for IntersectionTwoTerms<TDocSet> {
fn advance(&mut self) -> bool {
let (left, right) = (&mut self.left, &mut self.right);
if !left.advance() {
return false;
}
let mut candidate = left.doc();
loop {
match right.skip_next(candidate) {
SkipResult::Reached => {
return true;
}
SkipResult::End => {
return false;
}
SkipResult::OverStep => {
candidate = right.doc();
}
}
match left.skip_next(candidate) {
SkipResult::Reached => {
return true;
}
SkipResult::End => {
return false;
}
SkipResult::OverStep => {
candidate = left.doc();
}
}
}
}
fn doc(&self) -> DocId {
self.left.doc()
}
fn size_hint(&self) -> u32 {
self.left.size_hint().min(self.right.size_hint())
}
}
impl<TScorer: Scorer> Scorer for IntersectionTwoTerms<TScorer> {
fn score(&mut self) -> Score {
self.left.score() + self.right.score()
}
}

View File

@@ -16,6 +16,7 @@ mod range_query;
mod exclude;
mod union;
mod intersection;
mod intersection_two;
mod reqopt_scorer;
#[cfg(test)]
@@ -45,3 +46,4 @@ pub use self::weight::Weight;
pub use self::all_query::{AllQuery, AllScorer, AllWeight};
pub use self::range_query::RangeQuery;
pub use self::scorer::ConstScorer;
pub use self::intersection_two::IntersectionTwoTerms;

View File

@@ -2,14 +2,15 @@ use DocId;
use docset::{DocSet, SkipResult};
use postings::{Postings, SegmentPostings};
use query::{Intersection, Scorer};
use fastfield::DeleteBitSet;
struct PostingsWithOffset {
offset: u32,
segment_postings: SegmentPostings,
segment_postings: SegmentPostings<DeleteBitSet>,
}
impl PostingsWithOffset {
pub fn new(segment_postings: SegmentPostings, offset: u32) -> PostingsWithOffset {
pub fn new(segment_postings: SegmentPostings<DeleteBitSet>, offset: u32) -> PostingsWithOffset {
PostingsWithOffset {
offset,
segment_postings,
@@ -50,7 +51,7 @@ pub struct PhraseScorer {
}
impl PhraseScorer {
pub fn new(term_postings: Vec<SegmentPostings>) -> PhraseScorer {
pub fn new(term_postings: Vec<SegmentPostings<DeleteBitSet>>) -> PhraseScorer {
let postings_with_offsets: Vec<_> = term_postings
.into_iter()
.enumerate()

View File

@@ -5,20 +5,21 @@ use postings::SegmentPostings;
use query::Scorer;
use postings::Postings;
use fastfield::FastFieldReader;
use postings::{NoDelete, DeleteSet};
pub struct TermScorer {
pub struct TermScorer<TDeleteSet: DeleteSet=NoDelete> {
pub idf: Score,
pub fieldnorm_reader_opt: Option<FastFieldReader<u64>>,
pub postings: SegmentPostings,
pub postings: SegmentPostings<TDeleteSet>,
}
impl TermScorer {
pub fn postings(&self) -> &SegmentPostings {
impl<TDeleteSet: DeleteSet> TermScorer<TDeleteSet> {
pub fn postings(&self) -> &SegmentPostings<TDeleteSet> {
&self.postings
}
}
impl DocSet for TermScorer {
impl<TDeleteSet: DeleteSet> DocSet for TermScorer<TDeleteSet> {
fn advance(&mut self) -> bool {
self.postings.advance()
}
@@ -36,7 +37,7 @@ impl DocSet for TermScorer {
}
}
impl Scorer for TermScorer {
impl<TDeleteSet: DeleteSet> Scorer for TermScorer<TDeleteSet> {
fn score(&mut self) -> Score {
let doc = self.postings.doc();
let tf = match self.fieldnorm_reader_opt {

View File

@@ -6,6 +6,8 @@ use docset::DocSet;
use postings::SegmentPostings;
use schema::IndexRecordOption;
use super::term_scorer::TermScorer;
use fastfield::DeleteBitSet;
use postings::{DeleteSet, NoDelete};
use Result;
pub struct TermWeight {
@@ -17,8 +19,46 @@ pub struct TermWeight {
impl Weight for TermWeight {
fn scorer(&self, reader: &SegmentReader) -> Result<Box<Scorer>> {
let specialized_scorer = self.specialized_scorer(reader)?;
Ok(box specialized_scorer)
let field = self.term.field();
let inverted_index = reader.inverted_index(field);
let fieldnorm_reader_opt = reader.get_fieldnorms_reader(field);
let scorer: Box<Scorer>;
if reader.has_deletes() {
let postings_opt: Option<SegmentPostings<DeleteBitSet>> =
inverted_index.read_postings(&self.term, self.index_record_option);
scorer =
if let Some(segment_postings) = postings_opt {
box TermScorer {
idf: self.idf(),
fieldnorm_reader_opt,
postings: segment_postings,
}
} else {
box TermScorer {
idf: 1f32,
fieldnorm_reader_opt: None,
postings: SegmentPostings::<NoDelete>::empty(),
}
};
} else {
let postings_opt: Option<SegmentPostings<NoDelete>> =
inverted_index.read_postings_no_deletes(&self.term, self.index_record_option);
scorer =
if let Some(segment_postings) = postings_opt {
box TermScorer {
idf: self.idf(),
fieldnorm_reader_opt,
postings: segment_postings,
}
} else {
box TermScorer {
idf: 1f32,
fieldnorm_reader_opt: None,
postings: SegmentPostings::<NoDelete>::empty(),
}
};
}
Ok(scorer)
}
fn count(&self, reader: &SegmentReader) -> Result<u32> {
@@ -30,7 +70,7 @@ impl Weight for TermWeight {
.map(|term_info| term_info.doc_freq)
.unwrap_or(0))
} else {
Ok(self.specialized_scorer(reader)?.count())
Ok(self.scorer(reader)?.count())
}
}
}
@@ -39,26 +79,4 @@ impl TermWeight {
fn idf(&self) -> f32 {
1.0 + (self.num_docs as f32 / (self.doc_freq as f32 + 1.0)).ln()
}
/// If the field is not found, returns an empty `DocSet`.
pub fn specialized_scorer(&self, reader: &SegmentReader) -> Result<TermScorer> {
let field = self.term.field();
let inverted_index = reader.inverted_index(field);
let fieldnorm_reader_opt = reader.get_fieldnorms_reader(field);
let postings_opt: Option<SegmentPostings> =
inverted_index.read_postings(&self.term, self.index_record_option);
if let Some(segment_postings) = postings_opt {
Ok(TermScorer {
idf: self.idf(),
fieldnorm_reader_opt,
postings: segment_postings,
})
} else {
Ok(TermScorer {
idf: 1f32,
fieldnorm_reader_opt: None,
postings: SegmentPostings::empty(),
})
}
}
}