Removed specialized postings on SegmentPostings

This commit is contained in:
Paul Masurel
2018-02-18 00:09:15 +09:00
parent 20bede9462
commit eb50e92ec4
7 changed files with 34 additions and 30 deletions

View File

@@ -4,9 +4,7 @@ use postings::{BlockSegmentPostings, SegmentPostings};
use postings::TermInfo;
use schema::IndexRecordOption;
use schema::Term;
use std::cmp;
use fastfield::DeleteBitSet;
use schema::Schema;
use compression::CompressedIntStream;
use postings::FreqReadingOption;
@@ -139,7 +137,6 @@ impl InvertedIndexReader {
/// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
/// with `DocId`s and frequencies.
pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
let field = term.field();
let term_info = get!(self.get_term_info(term));
Some(self.read_postings_from_terminfo(&term_info, option))
}

View File

@@ -9,6 +9,7 @@ use fastfield::DeleteBitSet;
use std::cell::UnsafeCell;
use directory::{ReadOnlySource, SourceRead};
use postings::FreqReadingOption;
use postings::serializer::PostingsSerializer;
const EMPTY_POSITIONS: [u32; 0] = [0u32; 0];
@@ -70,6 +71,24 @@ pub struct SegmentPostings {
}
impl SegmentPostings {
pub fn create_from_docs(docs: &[u32]) -> SegmentPostings {
let mut buffer = Vec::new();
{
let mut postings_serializer = PostingsSerializer::new(&mut buffer, false);
for &doc in docs {
postings_serializer.write_doc(doc, 1u32).unwrap();
}
postings_serializer.close_term().unwrap();
}
let data = ReadOnlySource::from(buffer);
let block_segment_postings = BlockSegmentPostings::from_data(
docs.len(),
SourceRead::from(data),
FreqReadingOption::NoFreq);
SegmentPostings::from_block_postings(block_segment_postings, DeleteBitSet::empty(), None)
}
/// Reads a Segment postings from an &[u8]
///
/// * `len` - number of document in the posting lists.
@@ -314,7 +333,7 @@ impl BlockSegmentPostings {
pub(crate) fn from_data(
doc_freq: usize,
data: SourceRead,
freq_reading_option: FreqReadingOption,
freq_reading_option: FreqReadingOption
) -> BlockSegmentPostings {
let num_bitpacked_blocks: usize = (doc_freq as usize) / COMPRESSION_BLOCK_SIZE;
let num_vint_docs = (doc_freq as usize) - COMPRESSION_BLOCK_SIZE * num_bitpacked_blocks;

View File

@@ -232,7 +232,7 @@ impl<'a> FieldSerializer<'a> {
}
}
struct PostingsSerializer<W: Write> {
pub struct PostingsSerializer<W: Write> {
postings_write: CountingWriter<W>,
last_doc_id_encoded: u32,
@@ -244,7 +244,7 @@ struct PostingsSerializer<W: Write> {
}
impl<W: Write> PostingsSerializer<W> {
fn new(write: W, termfreq_enabled: bool) -> PostingsSerializer<W> {
pub fn new(write: W, termfreq_enabled: bool) -> PostingsSerializer<W> {
PostingsSerializer {
postings_write: CountingWriter::wrap(write),
@@ -257,7 +257,7 @@ impl<W: Write> PostingsSerializer<W> {
}
}
fn write_doc(&mut self, doc_id: DocId, term_freq: u32) -> io::Result<()> {
pub fn write_doc(&mut self, doc_id: DocId, term_freq: u32) -> io::Result<()> {
self.doc_ids.push(doc_id);
if self.termfreq_enabled {
self.term_freqs.push(term_freq as u32);
@@ -282,7 +282,7 @@ impl<W: Write> PostingsSerializer<W> {
Ok(())
}
fn close_term(&mut self) -> io::Result<()> {
pub fn close_term(&mut self) -> io::Result<()> {
if !self.doc_ids.is_empty() {
// we have doc ids waiting to be written
// this happens when the number of doc ids is

View File

@@ -13,7 +13,6 @@ mod tests {
use collector::tests::TestCollector;
use Index;
use schema::*;
use fastfield::U64FastFieldReader;
use schema::IndexRecordOption;
#[test]

View File

@@ -9,7 +9,7 @@ pub use self::term_scorer::TermScorer;
#[cfg(test)]
mod tests {
use postings::{DocSet, VecPostings};
use postings::{DocSet, SegmentPostings};
use query::Scorer;
use query::term_query::TermScorer;
use query::Query;
@@ -59,7 +59,7 @@ mod tests {
let left_fieldnorms = U64FastFieldReader::from(vec![10, 4]);
assert_eq!(left_fieldnorms.get(0), 10);
assert_eq!(left_fieldnorms.get(1), 4);
let left = VecPostings::from(vec![1]);
let left = SegmentPostings::create_from_docs(&[1]);
let mut left_scorer = TermScorer {
idf: 0.30685282,
fieldnorm_reader_opt: Some(left_fieldnorms),

View File

@@ -3,32 +3,24 @@ use DocId;
use postings::SkipResult;
use fastfield::U64FastFieldReader;
use postings::DocSet;
use postings::SegmentPostings;
use query::Scorer;
use postings::Postings;
use fastfield::FastFieldReader;
pub struct TermScorer<TPostings>
where
TPostings: Postings,
{
pub struct TermScorer {
pub idf: Score,
pub fieldnorm_reader_opt: Option<U64FastFieldReader>,
pub postings: TPostings,
pub postings: SegmentPostings,
}
impl<TPostings> TermScorer<TPostings>
where
TPostings: Postings,
{
pub fn postings(&self) -> &TPostings {
impl TermScorer {
pub fn postings(&self) -> &SegmentPostings {
&self.postings
}
}
impl<TPostings> DocSet for TermScorer<TPostings>
where
TPostings: Postings,
{
impl DocSet for TermScorer {
fn advance(&mut self) -> bool {
self.postings.advance()
}
@@ -46,10 +38,7 @@ where
}
}
impl<TPostings> Scorer for TermScorer<TPostings>
where
TPostings: Postings,
{
impl Scorer for TermScorer {
fn score(&mut self) -> Score {
let doc = self.postings.doc();
let tf = match self.fieldnorm_reader_opt {

View File

@@ -39,7 +39,7 @@ impl TermWeight {
pub fn specialized_scorer(
&self,
reader: &SegmentReader,
) -> Result<TermScorer<SegmentPostings>> {
) -> Result<TermScorer> {
let field = self.term.field();
let inverted_index = reader.inverted_index(field);
let fieldnorm_reader_opt = reader.get_fieldnorms_reader(field);