Compare commits

..

2 Commits

Author SHA1 Message Date
Paul Masurel
e4759b1d82 Fixes build for no-default-features 2020-06-05 19:40:32 +09:00
Paul Masurel
4026d183bc Small readability change 2020-06-03 09:04:57 +09:00
3 changed files with 89 additions and 44 deletions

View File

@@ -24,8 +24,10 @@ use crate::IndexWriter;
use std::borrow::BorrowMut;
use std::collections::HashSet;
use std::fmt;
#[cfg(feature = "mmap")]
use std::path::{Path, PathBuf};
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
fn load_metas(

View File

@@ -295,8 +295,8 @@ impl SegmentReader {
}
/// Returns an iterator that will iterate over the alive document ids
pub fn doc_ids_alive<'a>(&'a self) -> impl Iterator<Item = DocId> + 'a {
(0u32..self.max_doc).filter(move |doc| !self.is_deleted(*doc))
pub fn doc_ids_alive(&self) -> SegmentReaderAliveDocsIterator<'_> {
SegmentReaderAliveDocsIterator::new(&self)
}
/// Summarize total space usage of this segment.
@@ -324,6 +324,52 @@ impl fmt::Debug for SegmentReader {
}
}
/// Implements the iterator trait to allow easy iteration
/// over non-deleted ("alive") DocIds in a SegmentReader
pub struct SegmentReaderAliveDocsIterator<'a> {
reader: &'a SegmentReader,
max_doc: DocId,
current: DocId,
}
impl<'a> SegmentReaderAliveDocsIterator<'a> {
pub fn new(reader: &'a SegmentReader) -> SegmentReaderAliveDocsIterator<'a> {
SegmentReaderAliveDocsIterator {
reader,
max_doc: reader.max_doc(),
current: 0,
}
}
}
impl<'a> Iterator for SegmentReaderAliveDocsIterator<'a> {
type Item = DocId;
fn next(&mut self) -> Option<Self::Item> {
// TODO: Use TinySet (like in BitSetDocSet) to speed this process up
if self.current >= self.max_doc {
return None;
}
// find the next alive doc id
while self.reader.is_deleted(self.current) {
self.current += 1;
if self.current >= self.max_doc {
return None;
}
}
// capture the current alive DocId
let result = Some(self.current);
// move down the chain
self.current += 1;
result
}
}
#[cfg(test)]
mod test {
use crate::core::Index;

View File

@@ -589,7 +589,9 @@ impl IndexMerger {
// of all of the segments containing the given term.
//
// These segments are non-empty and advance has already been called.
if !segment_postings.is_empty() {
if segment_postings.is_empty() {
continue;
}
// If not, the `term` will be entirely removed.
// We know that there is at least one document containing
@@ -617,11 +619,7 @@ impl IndexMerger {
segment_postings.positions(&mut positions_buffer);
let delta_positions = delta_computer.compute_delta(&positions_buffer);
field_serializer.write_doc(
remapped_doc_id,
term_freq,
delta_positions,
)?;
field_serializer.write_doc(remapped_doc_id, term_freq, delta_positions)?;
}
doc = segment_postings.advance();
@@ -631,7 +629,6 @@ impl IndexMerger {
// closing the term.
field_serializer.close_term()?;
}
}
field_serializer.close()?;
Ok(term_ord_mapping_opt)
}