mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 17:42:55 +00:00
issue/67 Added a advance interface to the term iterator
This commit is contained in:
@@ -53,6 +53,49 @@ impl<'a> TermIterator<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Advance the term iterator to the next term.
|
||||
/// Returns true if there is indeed another term
|
||||
/// False if there is none.
|
||||
pub fn advance(&mut self) -> bool {
|
||||
self.advance_segments();
|
||||
if let Some(mut head) = self.heap.pop() {
|
||||
mem::swap(&mut self.current_term, &mut head.term);
|
||||
self.current_segment_ords.push(head.segment_ord);
|
||||
loop {
|
||||
match self.heap.peek() {
|
||||
Some(&ref next_heap_it) if next_heap_it.term == self.current_term => {}
|
||||
_ => { break; }
|
||||
}
|
||||
let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
|
||||
self.current_segment_ords.push(next_heap_it.segment_ord);
|
||||
}
|
||||
true
|
||||
}
|
||||
else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Returns the current term.
|
||||
///
|
||||
/// This method may be called
|
||||
/// iff advance() has been called before
|
||||
/// and "true" was returned.
|
||||
pub fn term(&self) -> &Term {
|
||||
&self.current_term
|
||||
}
|
||||
|
||||
/// Returns the sorted list of segment ordinals
|
||||
/// that include the current term.
|
||||
///
|
||||
/// This method may be called
|
||||
/// iff advance() has been called before
|
||||
/// and "true" was returned.
|
||||
pub fn segment_ords(&self) -> &[usize]{
|
||||
&self.current_segment_ords[..]
|
||||
}
|
||||
|
||||
fn advance_segments(&mut self) {
|
||||
for segment_ord in self.current_segment_ords.drain(..) {
|
||||
if let Some(term) = self.key_streams[segment_ord].next() {
|
||||
@@ -69,22 +112,12 @@ impl<'a, 'f> Streamer<'a> for TermIterator<'f> {
|
||||
type Item = &'a Term;
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
self.advance_segments();
|
||||
self.heap
|
||||
.pop()
|
||||
.map(move |mut head| {
|
||||
mem::swap(&mut self.current_term, &mut head.term);
|
||||
self.current_segment_ords.push(head.segment_ord);
|
||||
loop {
|
||||
match self.heap.peek() {
|
||||
Some(&ref next_heap_it) if next_heap_it.term == self.current_term => {}
|
||||
_ => { break; }
|
||||
}
|
||||
let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
|
||||
self.current_segment_ords.push(next_heap_it.segment_ord);
|
||||
}
|
||||
&self.current_term
|
||||
})
|
||||
if self.advance() {
|
||||
Some(&self.current_term)
|
||||
}
|
||||
else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ use postings::PostingsSerializer;
|
||||
use postings::Postings;
|
||||
use postings::DocSet;
|
||||
use core::TermIterator;
|
||||
use fst::Streamer;
|
||||
use schema::{Schema, Field};
|
||||
use fastfield::FastFieldSerializer;
|
||||
use store::StoreWriter;
|
||||
@@ -132,7 +131,7 @@ impl IndexMerger {
|
||||
max_doc += reader.max_doc();
|
||||
}
|
||||
|
||||
while let Some(term) = merged_terms.next() {
|
||||
while merged_terms.advance() {
|
||||
// Create the total list of doc ids
|
||||
// by stacking the doc ids from the different segment.
|
||||
//
|
||||
@@ -142,14 +141,16 @@ impl IndexMerger {
|
||||
// - Segment 1's doc ids become [seg0.max_doc, seg0.max_doc + seg.max_doc]
|
||||
// - Segment 2's doc ids become [seg0.max_doc + seg1.max_doc, seg0.max_doc + seg1.max_doc + seg2.max_doc]
|
||||
// ...
|
||||
let term = merged_terms.term();
|
||||
let mut merged_postings =
|
||||
ChainedPostings::from(
|
||||
self.readers
|
||||
merged_terms
|
||||
.segment_ords()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.flat_map(|(segment_ord, reader)| {
|
||||
.cloned()
|
||||
.flat_map(|segment_ord| {
|
||||
let offset = offsets[segment_ord];
|
||||
reader
|
||||
self.readers[segment_ord]
|
||||
.read_postings_all_info(&term)
|
||||
.map(|segment_postings| OffsetPostings::new(segment_postings, offset))
|
||||
})
|
||||
|
||||
@@ -8,7 +8,6 @@ use std::str;
|
||||
/// Term represents the value that the token can take.
|
||||
///
|
||||
/// It actually wraps a `Vec<u8>`.
|
||||
/// TODO remove pub
|
||||
#[derive(Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
|
||||
pub struct Term(Vec<u8>);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user