diff --git a/src/core/merger.rs b/src/core/merger.rs index 9b512e26d..627703883 100644 --- a/src/core/merger.rs +++ b/src/core/merger.rs @@ -15,6 +15,7 @@ use schema::{Term, Schema, Field}; use fastfield::FastFieldSerializer; use store::StoreWriter; use postings::ChainedPostings; +use postings::HasLen; use postings::OffsetPostings; use core::index::SegmentInfo; use std::cmp::{min, max, Ordering}; @@ -206,8 +207,8 @@ impl IndexMerger { loop { match postings_merger.next() { Some((term, mut merged_doc_ids)) => { - try!(postings_serializer.new_term(&term, merged_doc_ids.doc_freq() as DocId)); - while merged_doc_ids.next() { + try!(postings_serializer.new_term(&term, merged_doc_ids.len() as DocId)); + while merged_doc_ids.advance() { try!(postings_serializer.write_doc(merged_doc_ids.doc(), merged_doc_ids.term_freq(), &EMPTY_ARRAY)); } try!(postings_serializer.close_term()); diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 000000000..7001fd459 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,3 @@ +pub enum Error { + A, +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 49b2ddb90..2e10ec14f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -242,10 +242,10 @@ mod tests { let searcher = index.searcher().unwrap(); let reader = &searcher.segments()[0]; let mut postings = reader.read_postings(&Term::from_field_text(text_field, "af")).unwrap(); - assert!(postings.next()); + assert!(postings.advance()); assert_eq!(postings.doc(), 0); assert_eq!(postings.term_freq(), 3); - assert!(!postings.next()); + assert!(!postings.advance()); } } diff --git a/src/postings/chained_postings.rs b/src/postings/chained_postings.rs index e0ee0e85e..69ab7625f 100644 --- a/src/postings/chained_postings.rs +++ b/src/postings/chained_postings.rs @@ -1,36 +1,37 @@ use DocId; -use postings::{Postings, SkipResult}; +use postings::Postings; use postings::OffsetPostings; use postings::DocSet; +use postings::HasLen; pub struct ChainedPostings<'a> { chained_postings: Vec>, posting_id: usize, - doc_freq: usize, + len: usize, } impl<'a> ChainedPostings<'a> { pub fn new(chained_postings: Vec>) -> ChainedPostings { - let doc_freq: usize = chained_postings + let len: usize = chained_postings .iter() - .map(|segment_postings| segment_postings.doc_freq()) + .map(|segment_postings| segment_postings.len()) .fold(0, |sum, addition| sum + addition); ChainedPostings { chained_postings: chained_postings, posting_id: 0, - doc_freq: doc_freq, + len: len, } } } impl<'a> DocSet for ChainedPostings<'a> { - fn next(&mut self,) -> bool { + fn advance(&mut self,) -> bool { if self.posting_id == self.chained_postings.len() { return false; } - while !self.chained_postings[self.posting_id].next() { + while !self.chained_postings[self.posting_id].advance() { self.posting_id += 1; if self.posting_id == self.chained_postings.len() { return false; @@ -42,14 +43,11 @@ impl<'a> DocSet for ChainedPostings<'a> { fn doc(&self,) -> DocId { self.chained_postings[self.posting_id].doc() } +} - fn skip_next(&mut self, _target: DocId) -> SkipResult { - // TODO implement. - panic!("not implemented"); - } - - fn doc_freq(&self,) -> usize { - self.doc_freq +impl<'a> HasLen for ChainedPostings<'a> { + fn len(&self,) -> usize { + self.len } } diff --git a/src/postings/docset.rs b/src/postings/docset.rs index 25e5b71b3..1f182586e 100644 --- a/src/postings/docset.rs +++ b/src/postings/docset.rs @@ -14,7 +14,7 @@ pub enum SkipResult { pub trait DocSet { // goes to the next element. // next needs to be called a first time to point to the correct element. - fn next(&mut self,) -> bool; + fn advance(&mut self,) -> bool; // after skipping position // the iterator in such a way that doc() will return a @@ -23,7 +23,7 @@ pub trait DocSet { loop { match self.doc().cmp(&target) { Ordering::Less => { - if !self.next() { + if !self.advance() { return SkipResult::End; } }, @@ -34,16 +34,14 @@ pub trait DocSet { } fn doc(&self,) -> DocId; - - fn doc_freq(&self,) -> usize; } impl DocSet for Box { - fn next(&mut self,) -> bool { + fn advance(&mut self,) -> bool { let unboxed: &mut TDocSet = self.borrow_mut(); - unboxed.next() + unboxed.advance() } fn skip_next(&mut self, target: DocId) -> SkipResult { @@ -53,20 +51,15 @@ impl DocSet for Box { fn doc(&self,) -> DocId { let unboxed: &TDocSet = self.borrow(); - unboxed.borrow().doc() - } - - fn doc_freq(&self,) -> usize { - let unboxed: &TDocSet = self.borrow(); - unboxed.doc_freq() + unboxed.doc() } } impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet { - fn next(&mut self,) -> bool { + fn advance(&mut self,) -> bool { let unref: &mut TDocSet = *self; - unref.next() + unref.advance() } fn skip_next(&mut self, target: DocId) -> SkipResult { @@ -78,10 +71,6 @@ impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet { let unref: &TDocSet = *self; unref.doc() } - - - fn doc_freq(&self,) -> usize { - let unref: &TDocSet = *self; - unref.doc_freq() - } } + + \ No newline at end of file diff --git a/src/postings/intersection.rs b/src/postings/intersection.rs index a381aebef..77319d53e 100644 --- a/src/postings/intersection.rs +++ b/src/postings/intersection.rs @@ -35,16 +35,16 @@ impl<'a> IntersectionDocSet<'a> { impl<'a> DocSet for IntersectionDocSet<'a> { - fn next(&mut self,) -> bool { + fn advance(&mut self,) -> bool { if self.finished { return false; } - if !self.left.next() { + if !self.left.advance() { self.finished = true; return false; } - if !self.right.next() { + if !self.right.advance() { self.finished = true; return false; } @@ -54,13 +54,13 @@ impl<'a> DocSet for IntersectionDocSet<'a> { return true; } Ordering::Less => { - if !self.left.next() { + if !self.left.advance() { self.finished = true; return false; } } Ordering::Greater => { - if !self.right.next() { + if !self.right.advance() { self.finished = true; return false; } @@ -72,12 +72,6 @@ impl<'a> DocSet for IntersectionDocSet<'a> { fn doc(&self,) -> DocId { self.left.doc() } - - fn doc_freq(&self,) -> usize { - // TODO not a great idea. - panic!("intersection does not implement doc freq"); - } - } #[inline(never)] diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 067023dc0..f6ec373de 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -28,6 +28,7 @@ pub use self::intersection::intersection; pub use self::intersection::IntersectionDocSet; pub use self::freq_handler::FreqHandler; pub use self::scored_docset::ScoredDocSet; +pub use self::postings::HasLen; #[cfg(test)] @@ -62,20 +63,20 @@ mod tests { let left = Box::new(VecPostings::from(vec!(1, 3, 9))); let right = Box::new(VecPostings::from(vec!(3, 4, 9, 18))); let mut intersection = IntersectionDocSet::new(vec!(left, right)); - assert!(intersection.next()); + assert!(intersection.advance()); assert_eq!(intersection.doc(), 3); - assert!(intersection.next()); + assert!(intersection.advance()); assert_eq!(intersection.doc(), 9); - assert!(!intersection.next()); + assert!(!intersection.advance()); } { let a = Box::new(VecPostings::from(vec!(1, 3, 9))); let b = Box::new(VecPostings::from(vec!(3, 4, 9, 18))); let c = Box::new(VecPostings::from(vec!(1, 5, 9, 111))); let mut intersection = IntersectionDocSet::new(vec!(a, b, c)); - assert!(intersection.next()); + assert!(intersection.advance()); assert_eq!(intersection.doc(), 9); - assert!(!intersection.next()); + assert!(!intersection.advance()); } } diff --git a/src/postings/offset_postings.rs b/src/postings/offset_postings.rs index 60e554a61..60239bee3 100644 --- a/src/postings/offset_postings.rs +++ b/src/postings/offset_postings.rs @@ -2,6 +2,7 @@ use postings::Postings; use postings::SegmentPostings; use postings::SkipResult; use postings::DocSet; +use postings::HasLen; use DocId; @@ -20,8 +21,8 @@ impl<'a> OffsetPostings<'a> { } impl<'a> DocSet for OffsetPostings<'a> { - fn next(&mut self,) -> bool { - self.underlying.next() + fn advance(&mut self,) -> bool { + self.underlying.advance() } fn doc(&self,) -> DocId { @@ -37,8 +38,12 @@ impl<'a> DocSet for OffsetPostings<'a> { } } - fn doc_freq(&self,) -> usize { - self.underlying.doc_freq() + +} + +impl<'a> HasLen for OffsetPostings<'a> { + fn len(&self,) -> usize { + self.underlying.len() } } diff --git a/src/postings/postings.rs b/src/postings/postings.rs index d507b269f..93483d705 100644 --- a/src/postings/postings.rs +++ b/src/postings/postings.rs @@ -1,7 +1,6 @@ use std::borrow::Borrow; use postings::docset::DocSet; - pub trait Postings: DocSet { fn term_freq(&self,) -> u32; } @@ -15,9 +14,27 @@ impl Postings for Box { } impl<'a, TPostings: Postings> Postings for &'a mut TPostings { - fn term_freq(&self,) -> u32 { let unref: &TPostings = *self; unref.term_freq() } } + +pub trait HasLen { + fn len(&self,) -> usize; +} + +impl HasLen for Box { + fn len(&self,) -> usize { + let unboxed: &THasLen = self.borrow(); + unboxed.borrow().len() + } +} + + +impl<'a> HasLen for &'a HasLen { + fn len(&self,) -> usize { + let unref: &HasLen = *self; + unref.len() + } +} diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index 5934cb249..8334a184a 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -1,12 +1,12 @@ use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockDecoder}; use DocId; -use postings::{Postings, FreqHandler, DocSet}; +use postings::{Postings, FreqHandler, DocSet, HasLen}; use std::num::Wrapping; // No Term Frequency, no postings. pub struct SegmentPostings<'a> { - doc_freq: usize, + len: usize, doc_offset: u32, block_decoder: SIMDBlockDecoder, freq_handler: FreqHandler, @@ -20,7 +20,7 @@ impl<'a> SegmentPostings<'a> { pub fn empty() -> SegmentPostings<'a> { SegmentPostings { - doc_freq: 0, + len: 0, doc_offset: 0, block_decoder: SIMDBlockDecoder::new(), freq_handler: FreqHandler::NoFreq, @@ -30,7 +30,7 @@ impl<'a> SegmentPostings<'a> { } pub fn load_next_block(&mut self,) { - let num_remaining_docs = self.doc_freq - self.cur.0; + let num_remaining_docs = self.len - self.cur.0; if num_remaining_docs >= NUM_DOCS_PER_BLOCK { self.remaining_data = self.block_decoder.uncompress_block_sorted(self.remaining_data, self.doc_offset); self.remaining_data = self.freq_handler.read_freq_block(self.remaining_data); @@ -42,9 +42,9 @@ impl<'a> SegmentPostings<'a> { } } - pub fn from_data(doc_freq: u32, data: &'a [u8], freq_handler: FreqHandler) -> SegmentPostings<'a> { + pub fn from_data(len: u32, data: &'a [u8], freq_handler: FreqHandler) -> SegmentPostings<'a> { SegmentPostings { - doc_freq: doc_freq as usize, + len: len as usize, doc_offset: 0, block_decoder: SIMDBlockDecoder::new(), freq_handler: freq_handler, @@ -66,9 +66,9 @@ impl<'a> DocSet for SegmentPostings<'a> { // goes to the next element. // next needs to be called a first time to point to the correct element. - fn next(&mut self,) -> bool { + fn advance(&mut self,) -> bool { self.cur += Wrapping(1); - if self.cur.0 >= self.doc_freq { + if self.cur.0 >= self.len { return false; } if self.index_within_block() == 0 { @@ -81,8 +81,11 @@ impl<'a> DocSet for SegmentPostings<'a> { self.block_decoder.output(self.index_within_block()) } - fn doc_freq(&self,) -> usize { - self.doc_freq +} + +impl<'a> HasLen for SegmentPostings<'a> { + fn len(&self,) -> usize { + self.len } } diff --git a/src/postings/union_postings.rs b/src/postings/union_postings.rs index 86b2b865f..876532f95 100644 --- a/src/postings/union_postings.rs +++ b/src/postings/union_postings.rs @@ -36,7 +36,7 @@ impl UnionPostings = iter::repeat(0u32).take(num_postings).collect(); let heap_items: Vec = postings @@ -69,7 +69,7 @@ impl UnionPostings UnionPostings DocSet for UnionPostings { - fn next(&mut self,) -> bool { + fn advance(&mut self,) -> bool { self.scorer.clear(); match self.queue.peek() { Some(&HeapItem(doc, ord)) => { @@ -122,15 +122,10 @@ impl DocSet for UnionPo return true; } - // TODO implement a faster skip_next - + // TODO implement a faster skip_next fn doc(&self,) -> DocId { self.doc } - - fn doc_freq(&self,) -> usize { - panic!("Doc freq"); - } } #[cfg(test)] @@ -175,18 +170,18 @@ mod tests { vec!(left, right), multi_term_scorer ); - assert!(union.next()); + assert!(union.advance()); assert_eq!(union.doc(), 1); assert!(abs_diff(union.scorer().score(), 2.182179f32) < 0.001); - assert!(union.next()); + assert!(union.advance()); assert_eq!(union.doc(), 2); assert!(abs_diff(union.scorer().score(), 0.2236068) < 0.001f32); - assert!(union.next()); + assert!(union.advance()); assert_eq!(union.doc(), 3); - assert!(union.next()); + assert!(union.advance()); assert!(abs_diff(union.scorer().score(), 0.8944272f32) < 0.001f32); assert_eq!(union.doc(), 8); - assert!(!union.next()); + assert!(!union.advance()); } } diff --git a/src/postings/vec_postings.rs b/src/postings/vec_postings.rs index 8fc8a1ec4..d5dd658de 100644 --- a/src/postings/vec_postings.rs +++ b/src/postings/vec_postings.rs @@ -1,7 +1,7 @@ #![allow(dead_code)] use DocId; -use postings::{Postings, DocSet, SkipResult}; +use postings::{Postings, DocSet, SkipResult, HasLen}; use std::num::Wrapping; use std::cmp::Ordering; @@ -20,7 +20,7 @@ impl From> for VecPostings { } impl DocSet for VecPostings { - fn next(&mut self,) -> bool { + fn advance(&mut self,) -> bool { self.cursor += Wrapping(1); self.doc_ids.len() > self.cursor.0 } @@ -28,10 +28,6 @@ impl DocSet for VecPostings { fn doc(&self,) -> DocId { self.doc_ids[self.cursor.0] } - - fn doc_freq(&self,) -> usize { - self.doc_ids.len() - } fn skip_next(&mut self, target: DocId) -> SkipResult { let mut start: usize = self.cursor.0; @@ -90,6 +86,12 @@ impl DocSet for VecPostings { } } +impl HasLen for VecPostings { + fn len(&self,) -> usize { + self.doc_ids.len() + } +} + impl Postings for VecPostings { fn term_freq(&self,) -> u32 { 1u32 @@ -108,9 +110,9 @@ pub mod tests { pub fn test_vec_postings() { let doc_ids: Vec = (0u32..1024u32).map(|e| e*3).collect(); let mut postings = VecPostings::from(doc_ids); - assert!(postings.next()); + assert!(postings.advance()); assert_eq!(postings.doc(), 0u32); - assert!(postings.next()); + assert!(postings.advance()); assert_eq!(postings.doc(), 3u32); assert_eq!(postings.term_freq(), 1u32); assert_eq!(postings.skip_next(14u32), SkipResult::OverStep); diff --git a/src/query/multi_term_query.rs b/src/query/multi_term_query.rs index d03c3e313..512ecbc6a 100644 --- a/src/query/multi_term_query.rs +++ b/src/query/multi_term_query.rs @@ -72,7 +72,7 @@ impl Query for MultiTermQuery { segment_search_timer.open("get_postings")); { let _collection_timer = segment_search_timer.open("collection"); - while postings.next() { + while postings.advance() { let scored_doc = ScoredDoc(postings.scorer().score(), postings.doc()); collector.collect(scored_doc); }