next() -> advance()

2026-05-26 13:10:41 +00:00 · 2016-08-06 11:50:17 +09:00
parent bc54db6872
commit bf0d072c2d
13 changed files with 101 additions and 93 deletions
--- a/src/core/merger.rs
+++ b/src/core/merger.rs
@@ -15,6 +15,7 @@ use schema::{Term, Schema, Field};
 use fastfield::FastFieldSerializer;
 use store::StoreWriter;
 use postings::ChainedPostings;
+use postings::HasLen;
 use postings::OffsetPostings;
 use core::index::SegmentInfo;
 use std::cmp::{min, max, Ordering};
@@ -206,8 +207,8 @@ impl IndexMerger {
        loop {
            match postings_merger.next() {
                Some((term, mut merged_doc_ids)) => {
-                    try!(postings_serializer.new_term(&term, merged_doc_ids.doc_freq() as DocId));
-                    while merged_doc_ids.next() {
+                    try!(postings_serializer.new_term(&term, merged_doc_ids.len() as DocId));
+                    while merged_doc_ids.advance() {
                        try!(postings_serializer.write_doc(merged_doc_ids.doc(), merged_doc_ids.term_freq(), &EMPTY_ARRAY));
                    }
                    try!(postings_serializer.close_term());
--- a/src/error.rs
+++ b/src/error.rs
@@ -0,0 +1,3 @@
+pub enum Error {
+    A,
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -242,10 +242,10 @@ mod tests {
            let searcher = index.searcher().unwrap();
            let reader = &searcher.segments()[0];
            let mut postings = reader.read_postings(&Term::from_field_text(text_field, "af")).unwrap();
-            assert!(postings.next());
+            assert!(postings.advance());
            assert_eq!(postings.doc(), 0);
            assert_eq!(postings.term_freq(), 3);
-            assert!(!postings.next());
+            assert!(!postings.advance());
        }
    }

--- a/src/postings/chained_postings.rs
+++ b/src/postings/chained_postings.rs
@@ -1,36 +1,37 @@
 use DocId;
-use postings::{Postings, SkipResult};
+use postings::Postings;
 use postings::OffsetPostings;
 use postings::DocSet;
+use postings::HasLen;

 pub struct ChainedPostings<'a> {
    chained_postings: Vec<OffsetPostings<'a>>,
    posting_id: usize,
-    doc_freq: usize,
+    len: usize,
 }

 impl<'a> ChainedPostings<'a> {
    
    pub fn new(chained_postings: Vec<OffsetPostings<'a>>) -> ChainedPostings {
-        let doc_freq: usize = chained_postings
+        let len: usize = chained_postings
            .iter()
-            .map(|segment_postings| segment_postings.doc_freq())
+            .map(|segment_postings| segment_postings.len())
            .fold(0, |sum, addition| sum + addition);
        ChainedPostings {
            chained_postings: chained_postings,
            posting_id: 0,
-            doc_freq: doc_freq,
+            len: len,
        }
    }
 }

 impl<'a> DocSet for ChainedPostings<'a> {

-    fn next(&mut self,) -> bool {
+    fn advance(&mut self,) -> bool {
        if self.posting_id == self.chained_postings.len() {
            return false;
        }
-        while !self.chained_postings[self.posting_id].next() {
+        while !self.chained_postings[self.posting_id].advance() {
            self.posting_id += 1;
            if self.posting_id == self.chained_postings.len() {
                return false;
@@ -42,14 +43,11 @@ impl<'a> DocSet for ChainedPostings<'a> {
    fn doc(&self,) -> DocId {
        self.chained_postings[self.posting_id].doc()
    }
+}

-    fn skip_next(&mut self, _target: DocId) -> SkipResult {
-        // TODO implement.
-        panic!("not implemented");
-    }
-    
-    fn doc_freq(&self,) -> usize {
-        self.doc_freq
+impl<'a> HasLen for ChainedPostings<'a> {
+    fn len(&self,) -> usize {
+        self.len
    }
 }

--- a/src/postings/docset.rs
+++ b/src/postings/docset.rs
@@ -14,7 +14,7 @@ pub enum SkipResult {
 pub trait DocSet {
    // goes to the next element.
    // next needs to be called a first time to point to the correct element.
-    fn next(&mut self,) -> bool;
+    fn advance(&mut self,) -> bool;
    
    // after skipping position
    // the iterator in such a way that doc() will return a
@@ -23,7 +23,7 @@ pub trait DocSet {
        loop {
            match self.doc().cmp(&target) {
                Ordering::Less => {
-                    if !self.next() {
+                    if !self.advance() {
                        return SkipResult::End;
                    }
                },
@@ -34,16 +34,14 @@ pub trait DocSet {
    }

    fn doc(&self,) -> DocId;
-
-    fn doc_freq(&self,) -> usize;
 }


 impl<TDocSet: DocSet> DocSet for Box<TDocSet> {

-    fn next(&mut self,) -> bool {
+    fn advance(&mut self,) -> bool {
        let unboxed: &mut TDocSet = self.borrow_mut();
-        unboxed.next()
+        unboxed.advance()
    }

    fn skip_next(&mut self, target: DocId) -> SkipResult {
@@ -53,20 +51,15 @@ impl<TDocSet: DocSet> DocSet for Box<TDocSet> {

    fn doc(&self,) -> DocId {
        let unboxed: &TDocSet = self.borrow();
-        unboxed.borrow().doc()
-    }
-
-    fn doc_freq(&self,) -> usize {
-        let unboxed: &TDocSet = self.borrow();
-        unboxed.doc_freq()
+        unboxed.doc()
    }
 }

 impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
   
-    fn next(&mut self,) -> bool {
+    fn advance(&mut self,) -> bool {
        let unref: &mut TDocSet = *self;
-        unref.next()
+        unref.advance()
    }
        
    fn skip_next(&mut self, target: DocId) -> SkipResult {
@@ -78,10 +71,6 @@ impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
        let unref: &TDocSet = *self;
        unref.doc()
    }
-
-    
-    fn doc_freq(&self,) -> usize {
-        let unref: &TDocSet = *self;
-        unref.doc_freq()
-    }
 }
+
+    
--- a/src/postings/intersection.rs
+++ b/src/postings/intersection.rs
@@ -35,16 +35,16 @@ impl<'a> IntersectionDocSet<'a> {

 impl<'a> DocSet for IntersectionDocSet<'a> {
    
-    fn next(&mut self,) -> bool {
+    fn advance(&mut self,) -> bool {
        if self.finished {
            return false;
        }
        
-        if !self.left.next() {
+        if !self.left.advance() {
            self.finished = true;
            return false;
        }
-        if !self.right.next() {
+        if !self.right.advance() {
            self.finished = true;
            return false;
        }
@@ -54,13 +54,13 @@ impl<'a> DocSet for IntersectionDocSet<'a> {
                    return true;
                }
                Ordering::Less => {
-                    if !self.left.next() {
+                    if !self.left.advance() {
                        self.finished = true;
                        return false;
                    }
                }
                Ordering::Greater => {
-                    if !self.right.next() {
+                    if !self.right.advance() {
                        self.finished = true;
                        return false;
                    }
@@ -72,12 +72,6 @@ impl<'a> DocSet for IntersectionDocSet<'a> {
    fn doc(&self,) -> DocId {
        self.left.doc()
    }
-    
-    fn doc_freq(&self,) -> usize {
-        // TODO not a great idea.
-        panic!("intersection does not implement doc freq");
-    }
-    
 }

 #[inline(never)]
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -28,6 +28,7 @@ pub use self::intersection::intersection;
 pub use self::intersection::IntersectionDocSet;
 pub use self::freq_handler::FreqHandler;
 pub use self::scored_docset::ScoredDocSet;
+pub use self::postings::HasLen;


 #[cfg(test)]
@@ -62,20 +63,20 @@ mod tests {
            let left = Box::new(VecPostings::from(vec!(1, 3, 9)));
            let right = Box::new(VecPostings::from(vec!(3, 4, 9, 18)));
            let mut intersection = IntersectionDocSet::new(vec!(left, right));
-            assert!(intersection.next());
+            assert!(intersection.advance());
            assert_eq!(intersection.doc(), 3);
-            assert!(intersection.next());
+            assert!(intersection.advance());
            assert_eq!(intersection.doc(), 9);
-            assert!(!intersection.next());
+            assert!(!intersection.advance());
        }
        {
            let a = Box::new(VecPostings::from(vec!(1, 3, 9)));
            let b = Box::new(VecPostings::from(vec!(3, 4, 9, 18)));
            let c = Box::new(VecPostings::from(vec!(1, 5, 9, 111)));
            let mut intersection = IntersectionDocSet::new(vec!(a, b, c));
-            assert!(intersection.next());
+            assert!(intersection.advance());
            assert_eq!(intersection.doc(), 9);
-            assert!(!intersection.next());
+            assert!(!intersection.advance());
        }
    }
     
--- a/src/postings/offset_postings.rs
+++ b/src/postings/offset_postings.rs
@@ -2,6 +2,7 @@ use postings::Postings;
 use postings::SegmentPostings;
 use postings::SkipResult;
 use postings::DocSet;
+use postings::HasLen;
 use DocId;


@@ -20,8 +21,8 @@ impl<'a> OffsetPostings<'a> {
 }

 impl<'a> DocSet for OffsetPostings<'a> {
-    fn next(&mut self,) -> bool {
-        self.underlying.next()
+    fn advance(&mut self,) -> bool {
+        self.underlying.advance()
    }
    
    fn doc(&self,) -> DocId {
@@ -37,8 +38,12 @@ impl<'a> DocSet for OffsetPostings<'a> {
        }
    }
    
-    fn doc_freq(&self,) -> usize {
-        self.underlying.doc_freq()
+ 
+}
+
+impl<'a> HasLen for OffsetPostings<'a> {
+    fn len(&self,) -> usize {
+        self.underlying.len()
    }
 }

--- a/src/postings/postings.rs
+++ b/src/postings/postings.rs
@@ -1,7 +1,6 @@
 use std::borrow::Borrow;
 use postings::docset::DocSet;

-
 pub trait Postings: DocSet {
    fn term_freq(&self,) -> u32;
 }
@@ -15,9 +14,27 @@ impl<TPostings: Postings> Postings for Box<TPostings> {
 }

 impl<'a, TPostings: Postings> Postings for &'a mut TPostings {
-   
    fn term_freq(&self,) -> u32 {
        let unref: &TPostings = *self;
        unref.term_freq()
    }
 }
+
+pub trait HasLen {
+    fn len(&self,) -> usize;
+}
+
+impl<THasLen: HasLen> HasLen for Box<THasLen> {
+     fn len(&self,) -> usize {
+         let unboxed: &THasLen = self.borrow();
+        unboxed.borrow().len()
+     }
+}
+
+
+impl<'a> HasLen for &'a HasLen {
+    fn len(&self,) -> usize {
+        let unref: &HasLen = *self;
+        unref.len()
+    }
+}
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -1,12 +1,12 @@
 use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockDecoder};
 use DocId;
-use postings::{Postings, FreqHandler, DocSet};
+use postings::{Postings, FreqHandler, DocSet, HasLen};
 use std::num::Wrapping;


 // No Term Frequency, no postings.
 pub struct SegmentPostings<'a> {
-    doc_freq: usize,
+    len: usize,
    doc_offset: u32,
    block_decoder: SIMDBlockDecoder,
    freq_handler: FreqHandler,
@@ -20,7 +20,7 @@ impl<'a> SegmentPostings<'a> {

    pub fn empty() -> SegmentPostings<'a> {
        SegmentPostings {
-            doc_freq: 0,
+            len: 0,
            doc_offset: 0,
            block_decoder: SIMDBlockDecoder::new(),
            freq_handler: FreqHandler::NoFreq,
@@ -30,7 +30,7 @@ impl<'a> SegmentPostings<'a> {
    }

    pub fn load_next_block(&mut self,) {
-        let num_remaining_docs = self.doc_freq - self.cur.0;
+        let num_remaining_docs = self.len - self.cur.0;
        if num_remaining_docs >= NUM_DOCS_PER_BLOCK {
            self.remaining_data = self.block_decoder.uncompress_block_sorted(self.remaining_data, self.doc_offset);
            self.remaining_data = self.freq_handler.read_freq_block(self.remaining_data);
@@ -42,9 +42,9 @@ impl<'a> SegmentPostings<'a> {
        }
    }

-    pub fn from_data(doc_freq: u32, data: &'a [u8], freq_handler: FreqHandler) -> SegmentPostings<'a> {
+    pub fn from_data(len: u32, data: &'a [u8], freq_handler: FreqHandler) -> SegmentPostings<'a> {
        SegmentPostings {
-            doc_freq: doc_freq as usize,
+            len: len as usize,
            doc_offset: 0,
            block_decoder: SIMDBlockDecoder::new(),
            freq_handler: freq_handler,
@@ -66,9 +66,9 @@ impl<'a> DocSet for SegmentPostings<'a> {

    // goes to the next element.
    // next needs to be called a first time to point to the correct element.
-    fn next(&mut self,) -> bool {
+    fn advance(&mut self,) -> bool {
        self.cur += Wrapping(1);
-        if self.cur.0 >= self.doc_freq {
+        if self.cur.0 >= self.len {
            return false;
        }
        if self.index_within_block() == 0 {
@@ -81,8 +81,11 @@ impl<'a> DocSet for SegmentPostings<'a> {
        self.block_decoder.output(self.index_within_block())
    }

-    fn doc_freq(&self,) -> usize {
-        self.doc_freq
+}
+
+impl<'a> HasLen for SegmentPostings<'a> {
+    fn len(&self,) -> usize {
+        self.len
    }
 }

--- a/src/postings/union_postings.rs
+++ b/src/postings/union_postings.rs
@@ -36,7 +36,7 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPos
        let num_postings = postings.len();
        assert_eq!(fieldnorms_reader.len(), num_postings);
        for posting in &mut postings {
-            assert!(posting.next());
+            assert!(posting.advance());
        }
        let mut term_frequencies: Vec<u32> = iter::repeat(0u32).take(num_postings).collect();
        let heap_items: Vec<HeapItem> = postings
@@ -69,7 +69,7 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPos
    fn advance_head(&mut self,) {
        let ord = self.queue.peek().unwrap().1 as usize;
        let cur_postings = &mut self.postings[ord];
-        if cur_postings.next() {
+        if cur_postings.advance() {
            let doc = cur_postings.doc();
            self.term_frequencies[ord] = cur_postings.term_freq();  
            self.queue.replace(HeapItem(doc, ord as u32));
@@ -87,7 +87,7 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPos

 impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> DocSet for UnionPostings<TPostings, TAccumulator> {
    
-    fn next(&mut self,) -> bool {
+    fn advance(&mut self,) -> bool {
        self.scorer.clear();
        match self.queue.peek() {
            Some(&HeapItem(doc, ord)) => {
@@ -122,15 +122,10 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> DocSet for UnionPo
        return true;
    }

-    // TODO implement a faster skip_next
-        
+    // TODO implement a faster skip_next   
    fn doc(&self,) -> DocId {
        self.doc
    }
-        
-    fn doc_freq(&self,) -> usize {
-        panic!("Doc freq");
-    }
 }

 #[cfg(test)]
@@ -175,18 +170,18 @@ mod tests {
            vec!(left, right),
            multi_term_scorer
        );
-        assert!(union.next());
+        assert!(union.advance());
        assert_eq!(union.doc(), 1);
        assert!(abs_diff(union.scorer().score(), 2.182179f32) < 0.001);
-        assert!(union.next());
+        assert!(union.advance());
        assert_eq!(union.doc(), 2);
        assert!(abs_diff(union.scorer().score(), 0.2236068) < 0.001f32);
-        assert!(union.next());
+        assert!(union.advance());
        assert_eq!(union.doc(), 3);
-        assert!(union.next());
+        assert!(union.advance());
        assert!(abs_diff(union.scorer().score(), 0.8944272f32) < 0.001f32);
        assert_eq!(union.doc(), 8);
-        assert!(!union.next());
+        assert!(!union.advance());
    }

 }
--- a/src/postings/vec_postings.rs
+++ b/src/postings/vec_postings.rs
@@ -1,7 +1,7 @@
 #![allow(dead_code)]

 use DocId;
-use postings::{Postings, DocSet, SkipResult};
+use postings::{Postings, DocSet, SkipResult, HasLen};
 use std::num::Wrapping;
 use std::cmp::Ordering;

@@ -20,7 +20,7 @@ impl From<Vec<DocId>> for VecPostings {
 }

 impl DocSet for VecPostings {
-    fn next(&mut self,) -> bool {
+    fn advance(&mut self,) -> bool {
        self.cursor += Wrapping(1);
        self.doc_ids.len() > self.cursor.0
    }
@@ -28,10 +28,6 @@ impl DocSet for VecPostings {
    fn doc(&self,) -> DocId {
        self.doc_ids[self.cursor.0]
    }
-
-    fn doc_freq(&self,) -> usize {
-        self.doc_ids.len()
-    }
    
    fn skip_next(&mut self, target: DocId) -> SkipResult {
        let mut start: usize = self.cursor.0;
@@ -90,6 +86,12 @@ impl DocSet for VecPostings {
    }
 }

+impl HasLen for VecPostings {
+    fn len(&self,) -> usize {
+        self.doc_ids.len()
+    }
+}
+
 impl Postings for VecPostings {
    fn term_freq(&self,) -> u32 {
        1u32
@@ -108,9 +110,9 @@ pub mod tests {
    pub fn test_vec_postings() {
        let doc_ids: Vec<DocId> = (0u32..1024u32).map(|e| e*3).collect();
        let mut postings = VecPostings::from(doc_ids);
-        assert!(postings.next());
+        assert!(postings.advance());
        assert_eq!(postings.doc(), 0u32);
-        assert!(postings.next());
+        assert!(postings.advance());
        assert_eq!(postings.doc(), 3u32);
        assert_eq!(postings.term_freq(), 1u32);
        assert_eq!(postings.skip_next(14u32), SkipResult::OverStep);
--- a/src/query/multi_term_query.rs
+++ b/src/query/multi_term_query.rs
@@ -72,7 +72,7 @@ impl Query for MultiTermQuery {
                        segment_search_timer.open("get_postings"));
                {
                    let _collection_timer = segment_search_timer.open("collection");
-                    while postings.next() {
+                    while postings.advance() {
                        let scored_doc = ScoredDoc(postings.scorer().score(), postings.doc());
                        collector.collect(scored_doc);
                    }