mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-26 13:10:41 +00:00
next() -> advance()
This commit is contained in:
@@ -15,6 +15,7 @@ use schema::{Term, Schema, Field};
|
||||
use fastfield::FastFieldSerializer;
|
||||
use store::StoreWriter;
|
||||
use postings::ChainedPostings;
|
||||
use postings::HasLen;
|
||||
use postings::OffsetPostings;
|
||||
use core::index::SegmentInfo;
|
||||
use std::cmp::{min, max, Ordering};
|
||||
@@ -206,8 +207,8 @@ impl IndexMerger {
|
||||
loop {
|
||||
match postings_merger.next() {
|
||||
Some((term, mut merged_doc_ids)) => {
|
||||
try!(postings_serializer.new_term(&term, merged_doc_ids.doc_freq() as DocId));
|
||||
while merged_doc_ids.next() {
|
||||
try!(postings_serializer.new_term(&term, merged_doc_ids.len() as DocId));
|
||||
while merged_doc_ids.advance() {
|
||||
try!(postings_serializer.write_doc(merged_doc_ids.doc(), merged_doc_ids.term_freq(), &EMPTY_ARRAY));
|
||||
}
|
||||
try!(postings_serializer.close_term());
|
||||
|
||||
3
src/error.rs
Normal file
3
src/error.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
pub enum Error {
|
||||
A,
|
||||
}
|
||||
@@ -242,10 +242,10 @@ mod tests {
|
||||
let searcher = index.searcher().unwrap();
|
||||
let reader = &searcher.segments()[0];
|
||||
let mut postings = reader.read_postings(&Term::from_field_text(text_field, "af")).unwrap();
|
||||
assert!(postings.next());
|
||||
assert!(postings.advance());
|
||||
assert_eq!(postings.doc(), 0);
|
||||
assert_eq!(postings.term_freq(), 3);
|
||||
assert!(!postings.next());
|
||||
assert!(!postings.advance());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,36 +1,37 @@
|
||||
use DocId;
|
||||
use postings::{Postings, SkipResult};
|
||||
use postings::Postings;
|
||||
use postings::OffsetPostings;
|
||||
use postings::DocSet;
|
||||
use postings::HasLen;
|
||||
|
||||
pub struct ChainedPostings<'a> {
|
||||
chained_postings: Vec<OffsetPostings<'a>>,
|
||||
posting_id: usize,
|
||||
doc_freq: usize,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl<'a> ChainedPostings<'a> {
|
||||
|
||||
pub fn new(chained_postings: Vec<OffsetPostings<'a>>) -> ChainedPostings {
|
||||
let doc_freq: usize = chained_postings
|
||||
let len: usize = chained_postings
|
||||
.iter()
|
||||
.map(|segment_postings| segment_postings.doc_freq())
|
||||
.map(|segment_postings| segment_postings.len())
|
||||
.fold(0, |sum, addition| sum + addition);
|
||||
ChainedPostings {
|
||||
chained_postings: chained_postings,
|
||||
posting_id: 0,
|
||||
doc_freq: doc_freq,
|
||||
len: len,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> DocSet for ChainedPostings<'a> {
|
||||
|
||||
fn next(&mut self,) -> bool {
|
||||
fn advance(&mut self,) -> bool {
|
||||
if self.posting_id == self.chained_postings.len() {
|
||||
return false;
|
||||
}
|
||||
while !self.chained_postings[self.posting_id].next() {
|
||||
while !self.chained_postings[self.posting_id].advance() {
|
||||
self.posting_id += 1;
|
||||
if self.posting_id == self.chained_postings.len() {
|
||||
return false;
|
||||
@@ -42,14 +43,11 @@ impl<'a> DocSet for ChainedPostings<'a> {
|
||||
fn doc(&self,) -> DocId {
|
||||
self.chained_postings[self.posting_id].doc()
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_next(&mut self, _target: DocId) -> SkipResult {
|
||||
// TODO implement.
|
||||
panic!("not implemented");
|
||||
}
|
||||
|
||||
fn doc_freq(&self,) -> usize {
|
||||
self.doc_freq
|
||||
impl<'a> HasLen for ChainedPostings<'a> {
|
||||
fn len(&self,) -> usize {
|
||||
self.len
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ pub enum SkipResult {
|
||||
pub trait DocSet {
|
||||
// goes to the next element.
|
||||
// next needs to be called a first time to point to the correct element.
|
||||
fn next(&mut self,) -> bool;
|
||||
fn advance(&mut self,) -> bool;
|
||||
|
||||
// after skipping position
|
||||
// the iterator in such a way that doc() will return a
|
||||
@@ -23,7 +23,7 @@ pub trait DocSet {
|
||||
loop {
|
||||
match self.doc().cmp(&target) {
|
||||
Ordering::Less => {
|
||||
if !self.next() {
|
||||
if !self.advance() {
|
||||
return SkipResult::End;
|
||||
}
|
||||
},
|
||||
@@ -34,16 +34,14 @@ pub trait DocSet {
|
||||
}
|
||||
|
||||
fn doc(&self,) -> DocId;
|
||||
|
||||
fn doc_freq(&self,) -> usize;
|
||||
}
|
||||
|
||||
|
||||
impl<TDocSet: DocSet> DocSet for Box<TDocSet> {
|
||||
|
||||
fn next(&mut self,) -> bool {
|
||||
fn advance(&mut self,) -> bool {
|
||||
let unboxed: &mut TDocSet = self.borrow_mut();
|
||||
unboxed.next()
|
||||
unboxed.advance()
|
||||
}
|
||||
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult {
|
||||
@@ -53,20 +51,15 @@ impl<TDocSet: DocSet> DocSet for Box<TDocSet> {
|
||||
|
||||
fn doc(&self,) -> DocId {
|
||||
let unboxed: &TDocSet = self.borrow();
|
||||
unboxed.borrow().doc()
|
||||
}
|
||||
|
||||
fn doc_freq(&self,) -> usize {
|
||||
let unboxed: &TDocSet = self.borrow();
|
||||
unboxed.doc_freq()
|
||||
unboxed.doc()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
|
||||
|
||||
fn next(&mut self,) -> bool {
|
||||
fn advance(&mut self,) -> bool {
|
||||
let unref: &mut TDocSet = *self;
|
||||
unref.next()
|
||||
unref.advance()
|
||||
}
|
||||
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult {
|
||||
@@ -78,10 +71,6 @@ impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
|
||||
let unref: &TDocSet = *self;
|
||||
unref.doc()
|
||||
}
|
||||
|
||||
|
||||
fn doc_freq(&self,) -> usize {
|
||||
let unref: &TDocSet = *self;
|
||||
unref.doc_freq()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -35,16 +35,16 @@ impl<'a> IntersectionDocSet<'a> {
|
||||
|
||||
impl<'a> DocSet for IntersectionDocSet<'a> {
|
||||
|
||||
fn next(&mut self,) -> bool {
|
||||
fn advance(&mut self,) -> bool {
|
||||
if self.finished {
|
||||
return false;
|
||||
}
|
||||
|
||||
if !self.left.next() {
|
||||
if !self.left.advance() {
|
||||
self.finished = true;
|
||||
return false;
|
||||
}
|
||||
if !self.right.next() {
|
||||
if !self.right.advance() {
|
||||
self.finished = true;
|
||||
return false;
|
||||
}
|
||||
@@ -54,13 +54,13 @@ impl<'a> DocSet for IntersectionDocSet<'a> {
|
||||
return true;
|
||||
}
|
||||
Ordering::Less => {
|
||||
if !self.left.next() {
|
||||
if !self.left.advance() {
|
||||
self.finished = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Ordering::Greater => {
|
||||
if !self.right.next() {
|
||||
if !self.right.advance() {
|
||||
self.finished = true;
|
||||
return false;
|
||||
}
|
||||
@@ -72,12 +72,6 @@ impl<'a> DocSet for IntersectionDocSet<'a> {
|
||||
fn doc(&self,) -> DocId {
|
||||
self.left.doc()
|
||||
}
|
||||
|
||||
fn doc_freq(&self,) -> usize {
|
||||
// TODO not a great idea.
|
||||
panic!("intersection does not implement doc freq");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
|
||||
@@ -28,6 +28,7 @@ pub use self::intersection::intersection;
|
||||
pub use self::intersection::IntersectionDocSet;
|
||||
pub use self::freq_handler::FreqHandler;
|
||||
pub use self::scored_docset::ScoredDocSet;
|
||||
pub use self::postings::HasLen;
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -62,20 +63,20 @@ mod tests {
|
||||
let left = Box::new(VecPostings::from(vec!(1, 3, 9)));
|
||||
let right = Box::new(VecPostings::from(vec!(3, 4, 9, 18)));
|
||||
let mut intersection = IntersectionDocSet::new(vec!(left, right));
|
||||
assert!(intersection.next());
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 3);
|
||||
assert!(intersection.next());
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 9);
|
||||
assert!(!intersection.next());
|
||||
assert!(!intersection.advance());
|
||||
}
|
||||
{
|
||||
let a = Box::new(VecPostings::from(vec!(1, 3, 9)));
|
||||
let b = Box::new(VecPostings::from(vec!(3, 4, 9, 18)));
|
||||
let c = Box::new(VecPostings::from(vec!(1, 5, 9, 111)));
|
||||
let mut intersection = IntersectionDocSet::new(vec!(a, b, c));
|
||||
assert!(intersection.next());
|
||||
assert!(intersection.advance());
|
||||
assert_eq!(intersection.doc(), 9);
|
||||
assert!(!intersection.next());
|
||||
assert!(!intersection.advance());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ use postings::Postings;
|
||||
use postings::SegmentPostings;
|
||||
use postings::SkipResult;
|
||||
use postings::DocSet;
|
||||
use postings::HasLen;
|
||||
use DocId;
|
||||
|
||||
|
||||
@@ -20,8 +21,8 @@ impl<'a> OffsetPostings<'a> {
|
||||
}
|
||||
|
||||
impl<'a> DocSet for OffsetPostings<'a> {
|
||||
fn next(&mut self,) -> bool {
|
||||
self.underlying.next()
|
||||
fn advance(&mut self,) -> bool {
|
||||
self.underlying.advance()
|
||||
}
|
||||
|
||||
fn doc(&self,) -> DocId {
|
||||
@@ -37,8 +38,12 @@ impl<'a> DocSet for OffsetPostings<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_freq(&self,) -> usize {
|
||||
self.underlying.doc_freq()
|
||||
|
||||
}
|
||||
|
||||
impl<'a> HasLen for OffsetPostings<'a> {
|
||||
fn len(&self,) -> usize {
|
||||
self.underlying.len()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::borrow::Borrow;
|
||||
use postings::docset::DocSet;
|
||||
|
||||
|
||||
pub trait Postings: DocSet {
|
||||
fn term_freq(&self,) -> u32;
|
||||
}
|
||||
@@ -15,9 +14,27 @@ impl<TPostings: Postings> Postings for Box<TPostings> {
|
||||
}
|
||||
|
||||
impl<'a, TPostings: Postings> Postings for &'a mut TPostings {
|
||||
|
||||
fn term_freq(&self,) -> u32 {
|
||||
let unref: &TPostings = *self;
|
||||
unref.term_freq()
|
||||
}
|
||||
}
|
||||
|
||||
pub trait HasLen {
|
||||
fn len(&self,) -> usize;
|
||||
}
|
||||
|
||||
impl<THasLen: HasLen> HasLen for Box<THasLen> {
|
||||
fn len(&self,) -> usize {
|
||||
let unboxed: &THasLen = self.borrow();
|
||||
unboxed.borrow().len()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<'a> HasLen for &'a HasLen {
|
||||
fn len(&self,) -> usize {
|
||||
let unref: &HasLen = *self;
|
||||
unref.len()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockDecoder};
|
||||
use DocId;
|
||||
use postings::{Postings, FreqHandler, DocSet};
|
||||
use postings::{Postings, FreqHandler, DocSet, HasLen};
|
||||
use std::num::Wrapping;
|
||||
|
||||
|
||||
// No Term Frequency, no postings.
|
||||
pub struct SegmentPostings<'a> {
|
||||
doc_freq: usize,
|
||||
len: usize,
|
||||
doc_offset: u32,
|
||||
block_decoder: SIMDBlockDecoder,
|
||||
freq_handler: FreqHandler,
|
||||
@@ -20,7 +20,7 @@ impl<'a> SegmentPostings<'a> {
|
||||
|
||||
pub fn empty() -> SegmentPostings<'a> {
|
||||
SegmentPostings {
|
||||
doc_freq: 0,
|
||||
len: 0,
|
||||
doc_offset: 0,
|
||||
block_decoder: SIMDBlockDecoder::new(),
|
||||
freq_handler: FreqHandler::NoFreq,
|
||||
@@ -30,7 +30,7 @@ impl<'a> SegmentPostings<'a> {
|
||||
}
|
||||
|
||||
pub fn load_next_block(&mut self,) {
|
||||
let num_remaining_docs = self.doc_freq - self.cur.0;
|
||||
let num_remaining_docs = self.len - self.cur.0;
|
||||
if num_remaining_docs >= NUM_DOCS_PER_BLOCK {
|
||||
self.remaining_data = self.block_decoder.uncompress_block_sorted(self.remaining_data, self.doc_offset);
|
||||
self.remaining_data = self.freq_handler.read_freq_block(self.remaining_data);
|
||||
@@ -42,9 +42,9 @@ impl<'a> SegmentPostings<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_data(doc_freq: u32, data: &'a [u8], freq_handler: FreqHandler) -> SegmentPostings<'a> {
|
||||
pub fn from_data(len: u32, data: &'a [u8], freq_handler: FreqHandler) -> SegmentPostings<'a> {
|
||||
SegmentPostings {
|
||||
doc_freq: doc_freq as usize,
|
||||
len: len as usize,
|
||||
doc_offset: 0,
|
||||
block_decoder: SIMDBlockDecoder::new(),
|
||||
freq_handler: freq_handler,
|
||||
@@ -66,9 +66,9 @@ impl<'a> DocSet for SegmentPostings<'a> {
|
||||
|
||||
// goes to the next element.
|
||||
// next needs to be called a first time to point to the correct element.
|
||||
fn next(&mut self,) -> bool {
|
||||
fn advance(&mut self,) -> bool {
|
||||
self.cur += Wrapping(1);
|
||||
if self.cur.0 >= self.doc_freq {
|
||||
if self.cur.0 >= self.len {
|
||||
return false;
|
||||
}
|
||||
if self.index_within_block() == 0 {
|
||||
@@ -81,8 +81,11 @@ impl<'a> DocSet for SegmentPostings<'a> {
|
||||
self.block_decoder.output(self.index_within_block())
|
||||
}
|
||||
|
||||
fn doc_freq(&self,) -> usize {
|
||||
self.doc_freq
|
||||
}
|
||||
|
||||
impl<'a> HasLen for SegmentPostings<'a> {
|
||||
fn len(&self,) -> usize {
|
||||
self.len
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPos
|
||||
let num_postings = postings.len();
|
||||
assert_eq!(fieldnorms_reader.len(), num_postings);
|
||||
for posting in &mut postings {
|
||||
assert!(posting.next());
|
||||
assert!(posting.advance());
|
||||
}
|
||||
let mut term_frequencies: Vec<u32> = iter::repeat(0u32).take(num_postings).collect();
|
||||
let heap_items: Vec<HeapItem> = postings
|
||||
@@ -69,7 +69,7 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPos
|
||||
fn advance_head(&mut self,) {
|
||||
let ord = self.queue.peek().unwrap().1 as usize;
|
||||
let cur_postings = &mut self.postings[ord];
|
||||
if cur_postings.next() {
|
||||
if cur_postings.advance() {
|
||||
let doc = cur_postings.doc();
|
||||
self.term_frequencies[ord] = cur_postings.term_freq();
|
||||
self.queue.replace(HeapItem(doc, ord as u32));
|
||||
@@ -87,7 +87,7 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPos
|
||||
|
||||
impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> DocSet for UnionPostings<TPostings, TAccumulator> {
|
||||
|
||||
fn next(&mut self,) -> bool {
|
||||
fn advance(&mut self,) -> bool {
|
||||
self.scorer.clear();
|
||||
match self.queue.peek() {
|
||||
Some(&HeapItem(doc, ord)) => {
|
||||
@@ -122,15 +122,10 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> DocSet for UnionPo
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO implement a faster skip_next
|
||||
|
||||
// TODO implement a faster skip_next
|
||||
fn doc(&self,) -> DocId {
|
||||
self.doc
|
||||
}
|
||||
|
||||
fn doc_freq(&self,) -> usize {
|
||||
panic!("Doc freq");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -175,18 +170,18 @@ mod tests {
|
||||
vec!(left, right),
|
||||
multi_term_scorer
|
||||
);
|
||||
assert!(union.next());
|
||||
assert!(union.advance());
|
||||
assert_eq!(union.doc(), 1);
|
||||
assert!(abs_diff(union.scorer().score(), 2.182179f32) < 0.001);
|
||||
assert!(union.next());
|
||||
assert!(union.advance());
|
||||
assert_eq!(union.doc(), 2);
|
||||
assert!(abs_diff(union.scorer().score(), 0.2236068) < 0.001f32);
|
||||
assert!(union.next());
|
||||
assert!(union.advance());
|
||||
assert_eq!(union.doc(), 3);
|
||||
assert!(union.next());
|
||||
assert!(union.advance());
|
||||
assert!(abs_diff(union.scorer().score(), 0.8944272f32) < 0.001f32);
|
||||
assert_eq!(union.doc(), 8);
|
||||
assert!(!union.next());
|
||||
assert!(!union.advance());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
use DocId;
|
||||
use postings::{Postings, DocSet, SkipResult};
|
||||
use postings::{Postings, DocSet, SkipResult, HasLen};
|
||||
use std::num::Wrapping;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
@@ -20,7 +20,7 @@ impl From<Vec<DocId>> for VecPostings {
|
||||
}
|
||||
|
||||
impl DocSet for VecPostings {
|
||||
fn next(&mut self,) -> bool {
|
||||
fn advance(&mut self,) -> bool {
|
||||
self.cursor += Wrapping(1);
|
||||
self.doc_ids.len() > self.cursor.0
|
||||
}
|
||||
@@ -28,10 +28,6 @@ impl DocSet for VecPostings {
|
||||
fn doc(&self,) -> DocId {
|
||||
self.doc_ids[self.cursor.0]
|
||||
}
|
||||
|
||||
fn doc_freq(&self,) -> usize {
|
||||
self.doc_ids.len()
|
||||
}
|
||||
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult {
|
||||
let mut start: usize = self.cursor.0;
|
||||
@@ -90,6 +86,12 @@ impl DocSet for VecPostings {
|
||||
}
|
||||
}
|
||||
|
||||
impl HasLen for VecPostings {
|
||||
fn len(&self,) -> usize {
|
||||
self.doc_ids.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl Postings for VecPostings {
|
||||
fn term_freq(&self,) -> u32 {
|
||||
1u32
|
||||
@@ -108,9 +110,9 @@ pub mod tests {
|
||||
pub fn test_vec_postings() {
|
||||
let doc_ids: Vec<DocId> = (0u32..1024u32).map(|e| e*3).collect();
|
||||
let mut postings = VecPostings::from(doc_ids);
|
||||
assert!(postings.next());
|
||||
assert!(postings.advance());
|
||||
assert_eq!(postings.doc(), 0u32);
|
||||
assert!(postings.next());
|
||||
assert!(postings.advance());
|
||||
assert_eq!(postings.doc(), 3u32);
|
||||
assert_eq!(postings.term_freq(), 1u32);
|
||||
assert_eq!(postings.skip_next(14u32), SkipResult::OverStep);
|
||||
|
||||
@@ -72,7 +72,7 @@ impl Query for MultiTermQuery {
|
||||
segment_search_timer.open("get_postings"));
|
||||
{
|
||||
let _collection_timer = segment_search_timer.open("collection");
|
||||
while postings.next() {
|
||||
while postings.advance() {
|
||||
let scored_doc = ScoredDoc(postings.scorer().score(), postings.doc());
|
||||
collector.collect(scored_doc);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user