From eb50e92ec4156299f9eceec3cfca76638dfb90f0 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sun, 18 Feb 2018 00:09:15 +0900 Subject: [PATCH] Removed specialized postings on SegmentPostings --- src/core/inverted_index_reader.rs | 3 --- src/postings/segment_postings.rs | 21 ++++++++++++++++++++- src/postings/serializer.rs | 8 ++++---- src/query/boolean_query/mod.rs | 1 - src/query/term_query/mod.rs | 4 ++-- src/query/term_query/term_scorer.rs | 25 +++++++------------------ src/query/term_query/term_weight.rs | 2 +- 7 files changed, 34 insertions(+), 30 deletions(-) diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index 1d9664c66..52c9beaf9 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -4,9 +4,7 @@ use postings::{BlockSegmentPostings, SegmentPostings}; use postings::TermInfo; use schema::IndexRecordOption; use schema::Term; -use std::cmp; use fastfield::DeleteBitSet; -use schema::Schema; use compression::CompressedIntStream; use postings::FreqReadingOption; @@ -139,7 +137,6 @@ impl InvertedIndexReader { /// `TextIndexingOptions` that does not index position will return a `SegmentPostings` /// with `DocId`s and frequencies. pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option { - let field = term.field(); let term_info = get!(self.get_term_info(term)); Some(self.read_postings_from_terminfo(&term_info, option)) } diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index ffd56a30e..b1d592c97 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -9,6 +9,7 @@ use fastfield::DeleteBitSet; use std::cell::UnsafeCell; use directory::{ReadOnlySource, SourceRead}; use postings::FreqReadingOption; +use postings::serializer::PostingsSerializer; const EMPTY_POSITIONS: [u32; 0] = [0u32; 0]; @@ -70,6 +71,24 @@ pub struct SegmentPostings { } impl SegmentPostings { + + pub fn create_from_docs(docs: &[u32]) -> SegmentPostings { + let mut buffer = Vec::new(); + { + let mut postings_serializer = PostingsSerializer::new(&mut buffer, false); + for &doc in docs { + postings_serializer.write_doc(doc, 1u32).unwrap(); + } + postings_serializer.close_term().unwrap(); + } + let data = ReadOnlySource::from(buffer); + let block_segment_postings = BlockSegmentPostings::from_data( + docs.len(), + SourceRead::from(data), + FreqReadingOption::NoFreq); + SegmentPostings::from_block_postings(block_segment_postings, DeleteBitSet::empty(), None) + } + /// Reads a Segment postings from an &[u8] /// /// * `len` - number of document in the posting lists. @@ -314,7 +333,7 @@ impl BlockSegmentPostings { pub(crate) fn from_data( doc_freq: usize, data: SourceRead, - freq_reading_option: FreqReadingOption, + freq_reading_option: FreqReadingOption ) -> BlockSegmentPostings { let num_bitpacked_blocks: usize = (doc_freq as usize) / COMPRESSION_BLOCK_SIZE; let num_vint_docs = (doc_freq as usize) - COMPRESSION_BLOCK_SIZE * num_bitpacked_blocks; diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index 52ff8df2d..f40a73476 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -232,7 +232,7 @@ impl<'a> FieldSerializer<'a> { } } -struct PostingsSerializer { +pub struct PostingsSerializer { postings_write: CountingWriter, last_doc_id_encoded: u32, @@ -244,7 +244,7 @@ struct PostingsSerializer { } impl PostingsSerializer { - fn new(write: W, termfreq_enabled: bool) -> PostingsSerializer { + pub fn new(write: W, termfreq_enabled: bool) -> PostingsSerializer { PostingsSerializer { postings_write: CountingWriter::wrap(write), @@ -257,7 +257,7 @@ impl PostingsSerializer { } } - fn write_doc(&mut self, doc_id: DocId, term_freq: u32) -> io::Result<()> { + pub fn write_doc(&mut self, doc_id: DocId, term_freq: u32) -> io::Result<()> { self.doc_ids.push(doc_id); if self.termfreq_enabled { self.term_freqs.push(term_freq as u32); @@ -282,7 +282,7 @@ impl PostingsSerializer { Ok(()) } - fn close_term(&mut self) -> io::Result<()> { + pub fn close_term(&mut self) -> io::Result<()> { if !self.doc_ids.is_empty() { // we have doc ids waiting to be written // this happens when the number of doc ids is diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index 7c611ebed..81421e8bd 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -13,7 +13,6 @@ mod tests { use collector::tests::TestCollector; use Index; use schema::*; - use fastfield::U64FastFieldReader; use schema::IndexRecordOption; #[test] diff --git a/src/query/term_query/mod.rs b/src/query/term_query/mod.rs index 3d34960dd..7daf54423 100644 --- a/src/query/term_query/mod.rs +++ b/src/query/term_query/mod.rs @@ -9,7 +9,7 @@ pub use self::term_scorer::TermScorer; #[cfg(test)] mod tests { - use postings::{DocSet, VecPostings}; + use postings::{DocSet, SegmentPostings}; use query::Scorer; use query::term_query::TermScorer; use query::Query; @@ -59,7 +59,7 @@ mod tests { let left_fieldnorms = U64FastFieldReader::from(vec![10, 4]); assert_eq!(left_fieldnorms.get(0), 10); assert_eq!(left_fieldnorms.get(1), 4); - let left = VecPostings::from(vec![1]); + let left = SegmentPostings::create_from_docs(&[1]); let mut left_scorer = TermScorer { idf: 0.30685282, fieldnorm_reader_opt: Some(left_fieldnorms), diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs index 9593342ef..60a286635 100644 --- a/src/query/term_query/term_scorer.rs +++ b/src/query/term_query/term_scorer.rs @@ -3,32 +3,24 @@ use DocId; use postings::SkipResult; use fastfield::U64FastFieldReader; use postings::DocSet; +use postings::SegmentPostings; use query::Scorer; use postings::Postings; use fastfield::FastFieldReader; -pub struct TermScorer -where - TPostings: Postings, -{ +pub struct TermScorer { pub idf: Score, pub fieldnorm_reader_opt: Option, - pub postings: TPostings, + pub postings: SegmentPostings, } -impl TermScorer -where - TPostings: Postings, -{ - pub fn postings(&self) -> &TPostings { +impl TermScorer { + pub fn postings(&self) -> &SegmentPostings { &self.postings } } -impl DocSet for TermScorer -where - TPostings: Postings, -{ +impl DocSet for TermScorer { fn advance(&mut self) -> bool { self.postings.advance() } @@ -46,10 +38,7 @@ where } } -impl Scorer for TermScorer -where - TPostings: Postings, -{ +impl Scorer for TermScorer { fn score(&mut self) -> Score { let doc = self.postings.doc(); let tf = match self.fieldnorm_reader_opt { diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index e212613a7..e330174b6 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -39,7 +39,7 @@ impl TermWeight { pub fn specialized_scorer( &self, reader: &SegmentReader, - ) -> Result> { + ) -> Result { let field = self.term.field(); let inverted_index = reader.inverted_index(field); let fieldnorm_reader_opt = reader.get_fieldnorms_reader(field);