diff --git a/src/core/term_iterator.rs b/src/core/term_iterator.rs index fb2d70517..3a5e259f7 100644 --- a/src/core/term_iterator.rs +++ b/src/core/term_iterator.rs @@ -53,6 +53,49 @@ impl<'a> TermIterator<'a> { } } + /// Advance the term iterator to the next term. + /// Returns true if there is indeed another term + /// False if there is none. + pub fn advance(&mut self) -> bool { + self.advance_segments(); + if let Some(mut head) = self.heap.pop() { + mem::swap(&mut self.current_term, &mut head.term); + self.current_segment_ords.push(head.segment_ord); + loop { + match self.heap.peek() { + Some(&ref next_heap_it) if next_heap_it.term == self.current_term => {} + _ => { break; } + } + let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand + self.current_segment_ords.push(next_heap_it.segment_ord); + } + true + } + else { + false + } + } + + + /// Returns the current term. + /// + /// This method may be called + /// iff advance() has been called before + /// and "true" was returned. + pub fn term(&self) -> &Term { + &self.current_term + } + + /// Returns the sorted list of segment ordinals + /// that include the current term. + /// + /// This method may be called + /// iff advance() has been called before + /// and "true" was returned. + pub fn segment_ords(&self) -> &[usize]{ + &self.current_segment_ords[..] + } + fn advance_segments(&mut self) { for segment_ord in self.current_segment_ords.drain(..) { if let Some(term) = self.key_streams[segment_ord].next() { @@ -69,22 +112,12 @@ impl<'a, 'f> Streamer<'a> for TermIterator<'f> { type Item = &'a Term; fn next(&'a mut self) -> Option { - self.advance_segments(); - self.heap - .pop() - .map(move |mut head| { - mem::swap(&mut self.current_term, &mut head.term); - self.current_segment_ords.push(head.segment_ord); - loop { - match self.heap.peek() { - Some(&ref next_heap_it) if next_heap_it.term == self.current_term => {} - _ => { break; } - } - let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand - self.current_segment_ords.push(next_heap_it.segment_ord); - } - &self.current_term - }) + if self.advance() { + Some(&self.current_term) + } + else { + None + } } } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 4bc7e049e..4e616d5f0 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -8,7 +8,6 @@ use postings::PostingsSerializer; use postings::Postings; use postings::DocSet; use core::TermIterator; -use fst::Streamer; use schema::{Schema, Field}; use fastfield::FastFieldSerializer; use store::StoreWriter; @@ -132,7 +131,7 @@ impl IndexMerger { max_doc += reader.max_doc(); } - while let Some(term) = merged_terms.next() { + while merged_terms.advance() { // Create the total list of doc ids // by stacking the doc ids from the different segment. // @@ -142,14 +141,16 @@ impl IndexMerger { // - Segment 1's doc ids become [seg0.max_doc, seg0.max_doc + seg.max_doc] // - Segment 2's doc ids become [seg0.max_doc + seg1.max_doc, seg0.max_doc + seg1.max_doc + seg2.max_doc] // ... + let term = merged_terms.term(); let mut merged_postings = ChainedPostings::from( - self.readers + merged_terms + .segment_ords() .iter() - .enumerate() - .flat_map(|(segment_ord, reader)| { + .cloned() + .flat_map(|segment_ord| { let offset = offsets[segment_ord]; - reader + self.readers[segment_ord] .read_postings_all_info(&term) .map(|segment_postings| OffsetPostings::new(segment_postings, offset)) }) diff --git a/src/schema/term.rs b/src/schema/term.rs index c0c5009fe..305aac6b6 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -8,7 +8,6 @@ use std::str; /// Term represents the value that the token can take. /// /// It actually wraps a `Vec`. -/// TODO remove pub #[derive(Clone, PartialEq, PartialOrd, Ord, Eq, Hash)] pub struct Term(Vec);