Compare commits

..

1 Commits

Author SHA1 Message Date
Paul Masurel
8861919d5f Alive doc iterator. 2020-06-02 09:06:46 +09:00
3 changed files with 40 additions and 85 deletions

View File

@@ -24,10 +24,8 @@ use crate::IndexWriter;
use std::borrow::BorrowMut; use std::borrow::BorrowMut;
use std::collections::HashSet; use std::collections::HashSet;
use std::fmt; use std::fmt;
#[cfg(feature = "mmap")] #[cfg(feature = "mmap")]
use std::path::Path; use std::path::{Path, PathBuf};
use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
fn load_metas( fn load_metas(

View File

@@ -295,8 +295,8 @@ impl SegmentReader {
} }
/// Returns an iterator that will iterate over the alive document ids /// Returns an iterator that will iterate over the alive document ids
pub fn doc_ids_alive(&self) -> SegmentReaderAliveDocsIterator<'_> { pub fn doc_ids_alive<'a>(&'a self) -> impl Iterator<Item = DocId> + 'a {
SegmentReaderAliveDocsIterator::new(&self) (0u32..self.max_doc).filter(move |doc| !self.is_deleted(*doc))
} }
/// Summarize total space usage of this segment. /// Summarize total space usage of this segment.
@@ -324,52 +324,6 @@ impl fmt::Debug for SegmentReader {
} }
} }
/// Implements the iterator trait to allow easy iteration
/// over non-deleted ("alive") DocIds in a SegmentReader
pub struct SegmentReaderAliveDocsIterator<'a> {
reader: &'a SegmentReader,
max_doc: DocId,
current: DocId,
}
impl<'a> SegmentReaderAliveDocsIterator<'a> {
pub fn new(reader: &'a SegmentReader) -> SegmentReaderAliveDocsIterator<'a> {
SegmentReaderAliveDocsIterator {
reader,
max_doc: reader.max_doc(),
current: 0,
}
}
}
impl<'a> Iterator for SegmentReaderAliveDocsIterator<'a> {
type Item = DocId;
fn next(&mut self) -> Option<Self::Item> {
// TODO: Use TinySet (like in BitSetDocSet) to speed this process up
if self.current >= self.max_doc {
return None;
}
// find the next alive doc id
while self.reader.is_deleted(self.current) {
self.current += 1;
if self.current >= self.max_doc {
return None;
}
}
// capture the current alive DocId
let result = Some(self.current);
// move down the chain
self.current += 1;
result
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::core::Index; use crate::core::Index;

View File

@@ -589,9 +589,7 @@ impl IndexMerger {
// of all of the segments containing the given term. // of all of the segments containing the given term.
// //
// These segments are non-empty and advance has already been called. // These segments are non-empty and advance has already been called.
if segment_postings.is_empty() { if !segment_postings.is_empty() {
continue;
}
// If not, the `term` will be entirely removed. // If not, the `term` will be entirely removed.
// We know that there is at least one document containing // We know that there is at least one document containing
@@ -619,7 +617,11 @@ impl IndexMerger {
segment_postings.positions(&mut positions_buffer); segment_postings.positions(&mut positions_buffer);
let delta_positions = delta_computer.compute_delta(&positions_buffer); let delta_positions = delta_computer.compute_delta(&positions_buffer);
field_serializer.write_doc(remapped_doc_id, term_freq, delta_positions)?; field_serializer.write_doc(
remapped_doc_id,
term_freq,
delta_positions,
)?;
} }
doc = segment_postings.advance(); doc = segment_postings.advance();
@@ -629,6 +631,7 @@ impl IndexMerger {
// closing the term. // closing the term.
field_serializer.close_term()?; field_serializer.close_term()?;
} }
}
field_serializer.close()?; field_serializer.close()?;
Ok(term_ord_mapping_opt) Ok(term_ord_mapping_opt)
} }