next() -> advance()

This commit is contained in:
Paul Masurel
2016-08-06 11:50:17 +09:00
parent bc54db6872
commit bf0d072c2d
13 changed files with 101 additions and 93 deletions

View File

@@ -15,6 +15,7 @@ use schema::{Term, Schema, Field};
use fastfield::FastFieldSerializer;
use store::StoreWriter;
use postings::ChainedPostings;
use postings::HasLen;
use postings::OffsetPostings;
use core::index::SegmentInfo;
use std::cmp::{min, max, Ordering};
@@ -206,8 +207,8 @@ impl IndexMerger {
loop {
match postings_merger.next() {
Some((term, mut merged_doc_ids)) => {
try!(postings_serializer.new_term(&term, merged_doc_ids.doc_freq() as DocId));
while merged_doc_ids.next() {
try!(postings_serializer.new_term(&term, merged_doc_ids.len() as DocId));
while merged_doc_ids.advance() {
try!(postings_serializer.write_doc(merged_doc_ids.doc(), merged_doc_ids.term_freq(), &EMPTY_ARRAY));
}
try!(postings_serializer.close_term());

3
src/error.rs Normal file
View File

@@ -0,0 +1,3 @@
pub enum Error {
A,
}

View File

@@ -242,10 +242,10 @@ mod tests {
let searcher = index.searcher().unwrap();
let reader = &searcher.segments()[0];
let mut postings = reader.read_postings(&Term::from_field_text(text_field, "af")).unwrap();
assert!(postings.next());
assert!(postings.advance());
assert_eq!(postings.doc(), 0);
assert_eq!(postings.term_freq(), 3);
assert!(!postings.next());
assert!(!postings.advance());
}
}

View File

@@ -1,36 +1,37 @@
use DocId;
use postings::{Postings, SkipResult};
use postings::Postings;
use postings::OffsetPostings;
use postings::DocSet;
use postings::HasLen;
pub struct ChainedPostings<'a> {
chained_postings: Vec<OffsetPostings<'a>>,
posting_id: usize,
doc_freq: usize,
len: usize,
}
impl<'a> ChainedPostings<'a> {
pub fn new(chained_postings: Vec<OffsetPostings<'a>>) -> ChainedPostings {
let doc_freq: usize = chained_postings
let len: usize = chained_postings
.iter()
.map(|segment_postings| segment_postings.doc_freq())
.map(|segment_postings| segment_postings.len())
.fold(0, |sum, addition| sum + addition);
ChainedPostings {
chained_postings: chained_postings,
posting_id: 0,
doc_freq: doc_freq,
len: len,
}
}
}
impl<'a> DocSet for ChainedPostings<'a> {
fn next(&mut self,) -> bool {
fn advance(&mut self,) -> bool {
if self.posting_id == self.chained_postings.len() {
return false;
}
while !self.chained_postings[self.posting_id].next() {
while !self.chained_postings[self.posting_id].advance() {
self.posting_id += 1;
if self.posting_id == self.chained_postings.len() {
return false;
@@ -42,14 +43,11 @@ impl<'a> DocSet for ChainedPostings<'a> {
fn doc(&self,) -> DocId {
self.chained_postings[self.posting_id].doc()
}
}
fn skip_next(&mut self, _target: DocId) -> SkipResult {
// TODO implement.
panic!("not implemented");
}
fn doc_freq(&self,) -> usize {
self.doc_freq
impl<'a> HasLen for ChainedPostings<'a> {
fn len(&self,) -> usize {
self.len
}
}

View File

@@ -14,7 +14,7 @@ pub enum SkipResult {
pub trait DocSet {
// goes to the next element.
// next needs to be called a first time to point to the correct element.
fn next(&mut self,) -> bool;
fn advance(&mut self,) -> bool;
// after skipping position
// the iterator in such a way that doc() will return a
@@ -23,7 +23,7 @@ pub trait DocSet {
loop {
match self.doc().cmp(&target) {
Ordering::Less => {
if !self.next() {
if !self.advance() {
return SkipResult::End;
}
},
@@ -34,16 +34,14 @@ pub trait DocSet {
}
fn doc(&self,) -> DocId;
fn doc_freq(&self,) -> usize;
}
impl<TDocSet: DocSet> DocSet for Box<TDocSet> {
fn next(&mut self,) -> bool {
fn advance(&mut self,) -> bool {
let unboxed: &mut TDocSet = self.borrow_mut();
unboxed.next()
unboxed.advance()
}
fn skip_next(&mut self, target: DocId) -> SkipResult {
@@ -53,20 +51,15 @@ impl<TDocSet: DocSet> DocSet for Box<TDocSet> {
fn doc(&self,) -> DocId {
let unboxed: &TDocSet = self.borrow();
unboxed.borrow().doc()
}
fn doc_freq(&self,) -> usize {
let unboxed: &TDocSet = self.borrow();
unboxed.doc_freq()
unboxed.doc()
}
}
impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
fn next(&mut self,) -> bool {
fn advance(&mut self,) -> bool {
let unref: &mut TDocSet = *self;
unref.next()
unref.advance()
}
fn skip_next(&mut self, target: DocId) -> SkipResult {
@@ -78,10 +71,6 @@ impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
let unref: &TDocSet = *self;
unref.doc()
}
fn doc_freq(&self,) -> usize {
let unref: &TDocSet = *self;
unref.doc_freq()
}
}

View File

@@ -35,16 +35,16 @@ impl<'a> IntersectionDocSet<'a> {
impl<'a> DocSet for IntersectionDocSet<'a> {
fn next(&mut self,) -> bool {
fn advance(&mut self,) -> bool {
if self.finished {
return false;
}
if !self.left.next() {
if !self.left.advance() {
self.finished = true;
return false;
}
if !self.right.next() {
if !self.right.advance() {
self.finished = true;
return false;
}
@@ -54,13 +54,13 @@ impl<'a> DocSet for IntersectionDocSet<'a> {
return true;
}
Ordering::Less => {
if !self.left.next() {
if !self.left.advance() {
self.finished = true;
return false;
}
}
Ordering::Greater => {
if !self.right.next() {
if !self.right.advance() {
self.finished = true;
return false;
}
@@ -72,12 +72,6 @@ impl<'a> DocSet for IntersectionDocSet<'a> {
fn doc(&self,) -> DocId {
self.left.doc()
}
fn doc_freq(&self,) -> usize {
// TODO not a great idea.
panic!("intersection does not implement doc freq");
}
}
#[inline(never)]

View File

@@ -28,6 +28,7 @@ pub use self::intersection::intersection;
pub use self::intersection::IntersectionDocSet;
pub use self::freq_handler::FreqHandler;
pub use self::scored_docset::ScoredDocSet;
pub use self::postings::HasLen;
#[cfg(test)]
@@ -62,20 +63,20 @@ mod tests {
let left = Box::new(VecPostings::from(vec!(1, 3, 9)));
let right = Box::new(VecPostings::from(vec!(3, 4, 9, 18)));
let mut intersection = IntersectionDocSet::new(vec!(left, right));
assert!(intersection.next());
assert!(intersection.advance());
assert_eq!(intersection.doc(), 3);
assert!(intersection.next());
assert!(intersection.advance());
assert_eq!(intersection.doc(), 9);
assert!(!intersection.next());
assert!(!intersection.advance());
}
{
let a = Box::new(VecPostings::from(vec!(1, 3, 9)));
let b = Box::new(VecPostings::from(vec!(3, 4, 9, 18)));
let c = Box::new(VecPostings::from(vec!(1, 5, 9, 111)));
let mut intersection = IntersectionDocSet::new(vec!(a, b, c));
assert!(intersection.next());
assert!(intersection.advance());
assert_eq!(intersection.doc(), 9);
assert!(!intersection.next());
assert!(!intersection.advance());
}
}

View File

@@ -2,6 +2,7 @@ use postings::Postings;
use postings::SegmentPostings;
use postings::SkipResult;
use postings::DocSet;
use postings::HasLen;
use DocId;
@@ -20,8 +21,8 @@ impl<'a> OffsetPostings<'a> {
}
impl<'a> DocSet for OffsetPostings<'a> {
fn next(&mut self,) -> bool {
self.underlying.next()
fn advance(&mut self,) -> bool {
self.underlying.advance()
}
fn doc(&self,) -> DocId {
@@ -37,8 +38,12 @@ impl<'a> DocSet for OffsetPostings<'a> {
}
}
fn doc_freq(&self,) -> usize {
self.underlying.doc_freq()
}
impl<'a> HasLen for OffsetPostings<'a> {
fn len(&self,) -> usize {
self.underlying.len()
}
}

View File

@@ -1,7 +1,6 @@
use std::borrow::Borrow;
use postings::docset::DocSet;
pub trait Postings: DocSet {
fn term_freq(&self,) -> u32;
}
@@ -15,9 +14,27 @@ impl<TPostings: Postings> Postings for Box<TPostings> {
}
impl<'a, TPostings: Postings> Postings for &'a mut TPostings {
fn term_freq(&self,) -> u32 {
let unref: &TPostings = *self;
unref.term_freq()
}
}
pub trait HasLen {
fn len(&self,) -> usize;
}
impl<THasLen: HasLen> HasLen for Box<THasLen> {
fn len(&self,) -> usize {
let unboxed: &THasLen = self.borrow();
unboxed.borrow().len()
}
}
impl<'a> HasLen for &'a HasLen {
fn len(&self,) -> usize {
let unref: &HasLen = *self;
unref.len()
}
}

View File

@@ -1,12 +1,12 @@
use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockDecoder};
use DocId;
use postings::{Postings, FreqHandler, DocSet};
use postings::{Postings, FreqHandler, DocSet, HasLen};
use std::num::Wrapping;
// No Term Frequency, no postings.
pub struct SegmentPostings<'a> {
doc_freq: usize,
len: usize,
doc_offset: u32,
block_decoder: SIMDBlockDecoder,
freq_handler: FreqHandler,
@@ -20,7 +20,7 @@ impl<'a> SegmentPostings<'a> {
pub fn empty() -> SegmentPostings<'a> {
SegmentPostings {
doc_freq: 0,
len: 0,
doc_offset: 0,
block_decoder: SIMDBlockDecoder::new(),
freq_handler: FreqHandler::NoFreq,
@@ -30,7 +30,7 @@ impl<'a> SegmentPostings<'a> {
}
pub fn load_next_block(&mut self,) {
let num_remaining_docs = self.doc_freq - self.cur.0;
let num_remaining_docs = self.len - self.cur.0;
if num_remaining_docs >= NUM_DOCS_PER_BLOCK {
self.remaining_data = self.block_decoder.uncompress_block_sorted(self.remaining_data, self.doc_offset);
self.remaining_data = self.freq_handler.read_freq_block(self.remaining_data);
@@ -42,9 +42,9 @@ impl<'a> SegmentPostings<'a> {
}
}
pub fn from_data(doc_freq: u32, data: &'a [u8], freq_handler: FreqHandler) -> SegmentPostings<'a> {
pub fn from_data(len: u32, data: &'a [u8], freq_handler: FreqHandler) -> SegmentPostings<'a> {
SegmentPostings {
doc_freq: doc_freq as usize,
len: len as usize,
doc_offset: 0,
block_decoder: SIMDBlockDecoder::new(),
freq_handler: freq_handler,
@@ -66,9 +66,9 @@ impl<'a> DocSet for SegmentPostings<'a> {
// goes to the next element.
// next needs to be called a first time to point to the correct element.
fn next(&mut self,) -> bool {
fn advance(&mut self,) -> bool {
self.cur += Wrapping(1);
if self.cur.0 >= self.doc_freq {
if self.cur.0 >= self.len {
return false;
}
if self.index_within_block() == 0 {
@@ -81,8 +81,11 @@ impl<'a> DocSet for SegmentPostings<'a> {
self.block_decoder.output(self.index_within_block())
}
fn doc_freq(&self,) -> usize {
self.doc_freq
}
impl<'a> HasLen for SegmentPostings<'a> {
fn len(&self,) -> usize {
self.len
}
}

View File

@@ -36,7 +36,7 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPos
let num_postings = postings.len();
assert_eq!(fieldnorms_reader.len(), num_postings);
for posting in &mut postings {
assert!(posting.next());
assert!(posting.advance());
}
let mut term_frequencies: Vec<u32> = iter::repeat(0u32).take(num_postings).collect();
let heap_items: Vec<HeapItem> = postings
@@ -69,7 +69,7 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPos
fn advance_head(&mut self,) {
let ord = self.queue.peek().unwrap().1 as usize;
let cur_postings = &mut self.postings[ord];
if cur_postings.next() {
if cur_postings.advance() {
let doc = cur_postings.doc();
self.term_frequencies[ord] = cur_postings.term_freq();
self.queue.replace(HeapItem(doc, ord as u32));
@@ -87,7 +87,7 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPos
impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> DocSet for UnionPostings<TPostings, TAccumulator> {
fn next(&mut self,) -> bool {
fn advance(&mut self,) -> bool {
self.scorer.clear();
match self.queue.peek() {
Some(&HeapItem(doc, ord)) => {
@@ -122,15 +122,10 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> DocSet for UnionPo
return true;
}
// TODO implement a faster skip_next
// TODO implement a faster skip_next
fn doc(&self,) -> DocId {
self.doc
}
fn doc_freq(&self,) -> usize {
panic!("Doc freq");
}
}
#[cfg(test)]
@@ -175,18 +170,18 @@ mod tests {
vec!(left, right),
multi_term_scorer
);
assert!(union.next());
assert!(union.advance());
assert_eq!(union.doc(), 1);
assert!(abs_diff(union.scorer().score(), 2.182179f32) < 0.001);
assert!(union.next());
assert!(union.advance());
assert_eq!(union.doc(), 2);
assert!(abs_diff(union.scorer().score(), 0.2236068) < 0.001f32);
assert!(union.next());
assert!(union.advance());
assert_eq!(union.doc(), 3);
assert!(union.next());
assert!(union.advance());
assert!(abs_diff(union.scorer().score(), 0.8944272f32) < 0.001f32);
assert_eq!(union.doc(), 8);
assert!(!union.next());
assert!(!union.advance());
}
}

View File

@@ -1,7 +1,7 @@
#![allow(dead_code)]
use DocId;
use postings::{Postings, DocSet, SkipResult};
use postings::{Postings, DocSet, SkipResult, HasLen};
use std::num::Wrapping;
use std::cmp::Ordering;
@@ -20,7 +20,7 @@ impl From<Vec<DocId>> for VecPostings {
}
impl DocSet for VecPostings {
fn next(&mut self,) -> bool {
fn advance(&mut self,) -> bool {
self.cursor += Wrapping(1);
self.doc_ids.len() > self.cursor.0
}
@@ -28,10 +28,6 @@ impl DocSet for VecPostings {
fn doc(&self,) -> DocId {
self.doc_ids[self.cursor.0]
}
fn doc_freq(&self,) -> usize {
self.doc_ids.len()
}
fn skip_next(&mut self, target: DocId) -> SkipResult {
let mut start: usize = self.cursor.0;
@@ -90,6 +86,12 @@ impl DocSet for VecPostings {
}
}
impl HasLen for VecPostings {
fn len(&self,) -> usize {
self.doc_ids.len()
}
}
impl Postings for VecPostings {
fn term_freq(&self,) -> u32 {
1u32
@@ -108,9 +110,9 @@ pub mod tests {
pub fn test_vec_postings() {
let doc_ids: Vec<DocId> = (0u32..1024u32).map(|e| e*3).collect();
let mut postings = VecPostings::from(doc_ids);
assert!(postings.next());
assert!(postings.advance());
assert_eq!(postings.doc(), 0u32);
assert!(postings.next());
assert!(postings.advance());
assert_eq!(postings.doc(), 3u32);
assert_eq!(postings.term_freq(), 1u32);
assert_eq!(postings.skip_next(14u32), SkipResult::OverStep);

View File

@@ -72,7 +72,7 @@ impl Query for MultiTermQuery {
segment_search_timer.open("get_postings"));
{
let _collection_timer = segment_search_timer.open("collection");
while postings.next() {
while postings.advance() {
let scored_doc = ScoredDoc(postings.scorer().score(), postings.doc());
collector.collect(scored_doc);
}