mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 09:12:55 +00:00
Compare commits
2 Commits
removedali
...
nodeffeatf
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e4759b1d82 | ||
|
|
4026d183bc |
@@ -24,8 +24,10 @@ use crate::IndexWriter;
|
||||
use std::borrow::BorrowMut;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt;
|
||||
|
||||
#[cfg(feature = "mmap")]
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
fn load_metas(
|
||||
|
||||
@@ -295,8 +295,8 @@ impl SegmentReader {
|
||||
}
|
||||
|
||||
/// Returns an iterator that will iterate over the alive document ids
|
||||
pub fn doc_ids_alive<'a>(&'a self) -> impl Iterator<Item = DocId> + 'a {
|
||||
(0u32..self.max_doc).filter(move |doc| !self.is_deleted(*doc))
|
||||
pub fn doc_ids_alive(&self) -> SegmentReaderAliveDocsIterator<'_> {
|
||||
SegmentReaderAliveDocsIterator::new(&self)
|
||||
}
|
||||
|
||||
/// Summarize total space usage of this segment.
|
||||
@@ -324,6 +324,52 @@ impl fmt::Debug for SegmentReader {
|
||||
}
|
||||
}
|
||||
|
||||
/// Implements the iterator trait to allow easy iteration
|
||||
/// over non-deleted ("alive") DocIds in a SegmentReader
|
||||
pub struct SegmentReaderAliveDocsIterator<'a> {
|
||||
reader: &'a SegmentReader,
|
||||
max_doc: DocId,
|
||||
current: DocId,
|
||||
}
|
||||
|
||||
impl<'a> SegmentReaderAliveDocsIterator<'a> {
|
||||
pub fn new(reader: &'a SegmentReader) -> SegmentReaderAliveDocsIterator<'a> {
|
||||
SegmentReaderAliveDocsIterator {
|
||||
reader,
|
||||
max_doc: reader.max_doc(),
|
||||
current: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for SegmentReaderAliveDocsIterator<'a> {
|
||||
type Item = DocId;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// TODO: Use TinySet (like in BitSetDocSet) to speed this process up
|
||||
if self.current >= self.max_doc {
|
||||
return None;
|
||||
}
|
||||
|
||||
// find the next alive doc id
|
||||
while self.reader.is_deleted(self.current) {
|
||||
self.current += 1;
|
||||
|
||||
if self.current >= self.max_doc {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
// capture the current alive DocId
|
||||
let result = Some(self.current);
|
||||
|
||||
// move down the chain
|
||||
self.current += 1;
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::core::Index;
|
||||
|
||||
@@ -589,7 +589,9 @@ impl IndexMerger {
|
||||
// of all of the segments containing the given term.
|
||||
//
|
||||
// These segments are non-empty and advance has already been called.
|
||||
if !segment_postings.is_empty() {
|
||||
if segment_postings.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// If not, the `term` will be entirely removed.
|
||||
|
||||
// We know that there is at least one document containing
|
||||
@@ -617,11 +619,7 @@ impl IndexMerger {
|
||||
segment_postings.positions(&mut positions_buffer);
|
||||
|
||||
let delta_positions = delta_computer.compute_delta(&positions_buffer);
|
||||
field_serializer.write_doc(
|
||||
remapped_doc_id,
|
||||
term_freq,
|
||||
delta_positions,
|
||||
)?;
|
||||
field_serializer.write_doc(remapped_doc_id, term_freq, delta_positions)?;
|
||||
}
|
||||
|
||||
doc = segment_postings.advance();
|
||||
@@ -631,7 +629,6 @@ impl IndexMerger {
|
||||
// closing the term.
|
||||
field_serializer.close_term()?;
|
||||
}
|
||||
}
|
||||
field_serializer.close()?;
|
||||
Ok(term_ord_mapping_opt)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user