Fixes build for no-default-features

Small readability change
2026-02-17 13:20:36 +00:00 · 2020-06-05 19:40:32 +09:00 · 2020-06-03 09:04:57 +09:00
3 changed files with 89 additions and 44 deletions
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -24,8 +24,10 @@ use crate::IndexWriter;
 use std::borrow::BorrowMut;
 use std::collections::HashSet;
 use std::fmt;
+
 #[cfg(feature = "mmap")]
-use std::path::{Path, PathBuf};
+use std::path::Path;
+use std::path::PathBuf;
 use std::sync::Arc;

 fn load_metas(
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -295,8 +295,8 @@ impl SegmentReader {
    }

    /// Returns an iterator that will iterate over the alive document ids
-    pub fn doc_ids_alive<'a>(&'a self) -> impl Iterator<Item = DocId> + 'a {
-        (0u32..self.max_doc).filter(move |doc| !self.is_deleted(*doc))
+    pub fn doc_ids_alive(&self) -> SegmentReaderAliveDocsIterator<'_> {
+        SegmentReaderAliveDocsIterator::new(&self)
    }

    /// Summarize total space usage of this segment.
@@ -324,6 +324,52 @@ impl fmt::Debug for SegmentReader {
    }
 }

+/// Implements the iterator trait to allow easy iteration
+/// over non-deleted ("alive") DocIds in a SegmentReader
+pub struct SegmentReaderAliveDocsIterator<'a> {
+    reader: &'a SegmentReader,
+    max_doc: DocId,
+    current: DocId,
+}
+
+impl<'a> SegmentReaderAliveDocsIterator<'a> {
+    pub fn new(reader: &'a SegmentReader) -> SegmentReaderAliveDocsIterator<'a> {
+        SegmentReaderAliveDocsIterator {
+            reader,
+            max_doc: reader.max_doc(),
+            current: 0,
+        }
+    }
+}
+
+impl<'a> Iterator for SegmentReaderAliveDocsIterator<'a> {
+    type Item = DocId;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // TODO: Use TinySet (like in BitSetDocSet) to speed this process up
+        if self.current >= self.max_doc {
+            return None;
+        }
+
+        // find the next alive doc id
+        while self.reader.is_deleted(self.current) {
+            self.current += 1;
+
+            if self.current >= self.max_doc {
+                return None;
+            }
+        }
+
+        // capture the current alive DocId
+        let result = Some(self.current);
+
+        // move down the chain
+        self.current += 1;
+
+        result
+    }
+}
+
 #[cfg(test)]
 mod test {
    use crate::core::Index;
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -589,48 +589,45 @@ impl IndexMerger {
            // of all of the segments containing the given term.
            //
            // These segments are non-empty and advance has already been called.
-            if !segment_postings.is_empty() {
-                // If not, the `term` will be entirely removed.
-
-                // We know that there is at least one document containing
-                // the term, so we add it.
-                let to_term_ord = field_serializer.new_term(term_bytes)?;
-
-                if let Some(ref mut term_ord_mapping) = term_ord_mapping_opt {
-                    for (segment_ord, from_term_ord) in merged_terms.matching_segments() {
-                        term_ord_mapping.register_from_to(segment_ord, from_term_ord, to_term_ord);
-                    }
-                }
-
-                // We can now serialize this postings, by pushing each document to the
-                // postings serializer.
-                for (segment_ord, mut segment_postings) in segment_postings {
-                    let old_to_new_doc_id = &merged_doc_id_map[segment_ord];
-
-                    let mut doc = segment_postings.doc();
-                    while doc != TERMINATED {
-                        // deleted doc are skipped as they do not have a `remapped_doc_id`.
-                        if let Some(remapped_doc_id) = old_to_new_doc_id[doc as usize] {
-                            // we make sure to only write the term iff
-                            // there is at least one document.
-                            let term_freq = segment_postings.term_freq();
-                            segment_postings.positions(&mut positions_buffer);
-
-                            let delta_positions = delta_computer.compute_delta(&positions_buffer);
-                            field_serializer.write_doc(
-                                remapped_doc_id,
-                                term_freq,
-                                delta_positions,
-                            )?;
-                        }
-
-                        doc = segment_postings.advance();
-                    }
-                }
-
-                // closing the term.
-                field_serializer.close_term()?;
+            if segment_postings.is_empty() {
+                continue;
            }
+            // If not, the `term` will be entirely removed.
+
+            // We know that there is at least one document containing
+            // the term, so we add it.
+            let to_term_ord = field_serializer.new_term(term_bytes)?;
+
+            if let Some(ref mut term_ord_mapping) = term_ord_mapping_opt {
+                for (segment_ord, from_term_ord) in merged_terms.matching_segments() {
+                    term_ord_mapping.register_from_to(segment_ord, from_term_ord, to_term_ord);
+                }
+            }
+
+            // We can now serialize this postings, by pushing each document to the
+            // postings serializer.
+            for (segment_ord, mut segment_postings) in segment_postings {
+                let old_to_new_doc_id = &merged_doc_id_map[segment_ord];
+
+                let mut doc = segment_postings.doc();
+                while doc != TERMINATED {
+                    // deleted doc are skipped as they do not have a `remapped_doc_id`.
+                    if let Some(remapped_doc_id) = old_to_new_doc_id[doc as usize] {
+                        // we make sure to only write the term iff
+                        // there is at least one document.
+                        let term_freq = segment_postings.term_freq();
+                        segment_postings.positions(&mut positions_buffer);
+
+                        let delta_positions = delta_computer.compute_delta(&positions_buffer);
+                        field_serializer.write_doc(remapped_doc_id, term_freq, delta_positions)?;
+                    }
+
+                    doc = segment_postings.advance();
+                }
+            }
+
+            // closing the term.
+            field_serializer.close_term()?;
        }
        field_serializer.close()?;
        Ok(term_ord_mapping_opt)
Author	SHA1	Message	Date
Paul Masurel	e4759b1d82	Fixes build for no-default-features	2020-06-05 19:40:32 +09:00
Paul Masurel	4026d183bc	Small readability change	2020-06-03 09:04:57 +09:00