Removed specialized postings on SegmentPostings

2026-05-31 23:50:41 +00:00 · 2018-02-18 00:09:15 +09:00
parent 20bede9462
commit eb50e92ec4
7 changed files with 34 additions and 30 deletions
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -4,9 +4,7 @@ use postings::{BlockSegmentPostings, SegmentPostings};
 use postings::TermInfo;
 use schema::IndexRecordOption;
 use schema::Term;
-use std::cmp;
 use fastfield::DeleteBitSet;
-use schema::Schema;
 use compression::CompressedIntStream;
 use postings::FreqReadingOption;

@@ -139,7 +137,6 @@ impl InvertedIndexReader {
    /// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
    /// with `DocId`s and frequencies.
    pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
-        let field = term.field();
        let term_info = get!(self.get_term_info(term));
        Some(self.read_postings_from_terminfo(&term_info, option))
    }
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -9,6 +9,7 @@ use fastfield::DeleteBitSet;
 use std::cell::UnsafeCell;
 use directory::{ReadOnlySource, SourceRead};
 use postings::FreqReadingOption;
+use postings::serializer::PostingsSerializer;

 const EMPTY_POSITIONS: [u32; 0] = [0u32; 0];

@@ -70,6 +71,24 @@ pub struct SegmentPostings {
 }

 impl SegmentPostings {
+
+    pub fn create_from_docs(docs: &[u32]) -> SegmentPostings {
+        let mut buffer = Vec::new();
+        {
+            let mut postings_serializer = PostingsSerializer::new(&mut buffer, false);
+            for &doc in docs {
+                postings_serializer.write_doc(doc, 1u32).unwrap();
+            }
+            postings_serializer.close_term().unwrap();
+        }
+        let data = ReadOnlySource::from(buffer);
+        let block_segment_postings = BlockSegmentPostings::from_data(
+            docs.len(),
+            SourceRead::from(data),
+            FreqReadingOption::NoFreq);
+        SegmentPostings::from_block_postings(block_segment_postings, DeleteBitSet::empty(), None)
+    }
+
    /// Reads a Segment postings from an &[u8]
    ///
    /// * `len` - number of document in the posting lists.
@@ -314,7 +333,7 @@ impl BlockSegmentPostings {
    pub(crate) fn from_data(
        doc_freq: usize,
        data: SourceRead,
-        freq_reading_option: FreqReadingOption,
+        freq_reading_option: FreqReadingOption
    ) -> BlockSegmentPostings {
        let num_bitpacked_blocks: usize = (doc_freq as usize) / COMPRESSION_BLOCK_SIZE;
        let num_vint_docs = (doc_freq as usize) - COMPRESSION_BLOCK_SIZE * num_bitpacked_blocks;
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -232,7 +232,7 @@ impl<'a> FieldSerializer<'a> {
    }
 }

-struct PostingsSerializer<W: Write> {
+pub struct PostingsSerializer<W: Write> {
    postings_write: CountingWriter<W>,
    last_doc_id_encoded: u32,

@@ -244,7 +244,7 @@ struct PostingsSerializer<W: Write> {
 }

 impl<W: Write> PostingsSerializer<W> {
-    fn new(write: W, termfreq_enabled: bool) -> PostingsSerializer<W> {
+    pub fn new(write: W, termfreq_enabled: bool) -> PostingsSerializer<W> {
        PostingsSerializer {
            postings_write: CountingWriter::wrap(write),

@@ -257,7 +257,7 @@ impl<W: Write> PostingsSerializer<W> {
        }
    }

-    fn write_doc(&mut self, doc_id: DocId, term_freq: u32) -> io::Result<()> {
+    pub fn write_doc(&mut self, doc_id: DocId, term_freq: u32) -> io::Result<()> {
        self.doc_ids.push(doc_id);
        if self.termfreq_enabled {
            self.term_freqs.push(term_freq as u32);
@@ -282,7 +282,7 @@ impl<W: Write> PostingsSerializer<W> {
        Ok(())
    }

-    fn close_term(&mut self) -> io::Result<()> {
+    pub fn close_term(&mut self) -> io::Result<()> {
        if !self.doc_ids.is_empty() {
            // we have doc ids waiting to be written
            // this happens when the number of doc ids is
--- a/src/query/boolean_query/mod.rs
+++ b/src/query/boolean_query/mod.rs
@@ -13,7 +13,6 @@ mod tests {
    use collector::tests::TestCollector;
    use Index;
    use schema::*;
-    use fastfield::U64FastFieldReader;
    use schema::IndexRecordOption;

    #[test]
--- a/src/query/term_query/mod.rs
+++ b/src/query/term_query/mod.rs
@@ -9,7 +9,7 @@ pub use self::term_scorer::TermScorer;
 #[cfg(test)]
 mod tests {

-    use postings::{DocSet, VecPostings};
+    use postings::{DocSet, SegmentPostings};
    use query::Scorer;
    use query::term_query::TermScorer;
    use query::Query;
@@ -59,7 +59,7 @@ mod tests {
        let left_fieldnorms = U64FastFieldReader::from(vec![10, 4]);
        assert_eq!(left_fieldnorms.get(0), 10);
        assert_eq!(left_fieldnorms.get(1), 4);
-        let left = VecPostings::from(vec![1]);
+        let left = SegmentPostings::create_from_docs(&[1]);
        let mut left_scorer = TermScorer {
            idf: 0.30685282,
            fieldnorm_reader_opt: Some(left_fieldnorms),
--- a/src/query/term_query/term_scorer.rs
+++ b/src/query/term_query/term_scorer.rs
@@ -3,32 +3,24 @@ use DocId;
 use postings::SkipResult;
 use fastfield::U64FastFieldReader;
 use postings::DocSet;
+use postings::SegmentPostings;
 use query::Scorer;
 use postings::Postings;
 use fastfield::FastFieldReader;

-pub struct TermScorer<TPostings>
-where
-    TPostings: Postings,
-{
+pub struct TermScorer {
    pub idf: Score,
    pub fieldnorm_reader_opt: Option<U64FastFieldReader>,
-    pub postings: TPostings,
+    pub postings: SegmentPostings,
 }

-impl<TPostings> TermScorer<TPostings>
-where
-    TPostings: Postings,
-{
-    pub fn postings(&self) -> &TPostings {
+impl TermScorer {
+    pub fn postings(&self) -> &SegmentPostings {
        &self.postings
    }
 }

-impl<TPostings> DocSet for TermScorer<TPostings>
-where
-    TPostings: Postings,
-{
+impl DocSet for TermScorer {
    fn advance(&mut self) -> bool {
        self.postings.advance()
    }
@@ -46,10 +38,7 @@ where
    }
 }

-impl<TPostings> Scorer for TermScorer<TPostings>
-where
-    TPostings: Postings,
-{
+impl Scorer for TermScorer {
    fn score(&mut self) -> Score {
        let doc = self.postings.doc();
        let tf = match self.fieldnorm_reader_opt {
--- a/src/query/term_query/term_weight.rs
+++ b/src/query/term_query/term_weight.rs
@@ -39,7 +39,7 @@ impl TermWeight {
    pub fn specialized_scorer(
        &self,
        reader: &SegmentReader,
-    ) -> Result<TermScorer<SegmentPostings>> {
+    ) -> Result<TermScorer> {
        let field = self.term.field();
        let inverted_index = reader.inverted_index(field);
        let fieldnorm_reader_opt = reader.get_fieldnorms_reader(field);