issue/67 Added a advance interface to the term iterator

This commit is contained in:
Paul Masurel
2016-12-20 11:22:34 +01:00
parent 98cdc83428
commit d3d34be167
3 changed files with 56 additions and 23 deletions

View File

@@ -53,6 +53,49 @@ impl<'a> TermIterator<'a> {
}
}
/// Advance the term iterator to the next term.
/// Returns true if there is indeed another term
/// False if there is none.
pub fn advance(&mut self) -> bool {
self.advance_segments();
if let Some(mut head) = self.heap.pop() {
mem::swap(&mut self.current_term, &mut head.term);
self.current_segment_ords.push(head.segment_ord);
loop {
match self.heap.peek() {
Some(&ref next_heap_it) if next_heap_it.term == self.current_term => {}
_ => { break; }
}
let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
self.current_segment_ords.push(next_heap_it.segment_ord);
}
true
}
else {
false
}
}
/// Returns the current term.
///
/// This method may be called
/// iff advance() has been called before
/// and "true" was returned.
pub fn term(&self) -> &Term {
&self.current_term
}
/// Returns the sorted list of segment ordinals
/// that include the current term.
///
/// This method may be called
/// iff advance() has been called before
/// and "true" was returned.
pub fn segment_ords(&self) -> &[usize]{
&self.current_segment_ords[..]
}
fn advance_segments(&mut self) {
for segment_ord in self.current_segment_ords.drain(..) {
if let Some(term) = self.key_streams[segment_ord].next() {
@@ -69,22 +112,12 @@ impl<'a, 'f> Streamer<'a> for TermIterator<'f> {
type Item = &'a Term;
fn next(&'a mut self) -> Option<Self::Item> {
self.advance_segments();
self.heap
.pop()
.map(move |mut head| {
mem::swap(&mut self.current_term, &mut head.term);
self.current_segment_ords.push(head.segment_ord);
loop {
match self.heap.peek() {
Some(&ref next_heap_it) if next_heap_it.term == self.current_term => {}
_ => { break; }
}
let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
self.current_segment_ords.push(next_heap_it.segment_ord);
}
&self.current_term
})
if self.advance() {
Some(&self.current_term)
}
else {
None
}
}
}

View File

@@ -8,7 +8,6 @@ use postings::PostingsSerializer;
use postings::Postings;
use postings::DocSet;
use core::TermIterator;
use fst::Streamer;
use schema::{Schema, Field};
use fastfield::FastFieldSerializer;
use store::StoreWriter;
@@ -132,7 +131,7 @@ impl IndexMerger {
max_doc += reader.max_doc();
}
while let Some(term) = merged_terms.next() {
while merged_terms.advance() {
// Create the total list of doc ids
// by stacking the doc ids from the different segment.
//
@@ -142,14 +141,16 @@ impl IndexMerger {
// - Segment 1's doc ids become [seg0.max_doc, seg0.max_doc + seg.max_doc]
// - Segment 2's doc ids become [seg0.max_doc + seg1.max_doc, seg0.max_doc + seg1.max_doc + seg2.max_doc]
// ...
let term = merged_terms.term();
let mut merged_postings =
ChainedPostings::from(
self.readers
merged_terms
.segment_ords()
.iter()
.enumerate()
.flat_map(|(segment_ord, reader)| {
.cloned()
.flat_map(|segment_ord| {
let offset = offsets[segment_ord];
reader
self.readers[segment_ord]
.read_postings_all_info(&term)
.map(|segment_postings| OffsetPostings::new(segment_postings, offset))
})

View File

@@ -8,7 +8,6 @@ use std::str;
/// Term represents the value that the token can take.
///
/// It actually wraps a `Vec<u8>`.
/// TODO remove pub
#[derive(Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
pub struct Term(Vec<u8>);