diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 72cdc29a8..67538c051 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -1130,15 +1130,11 @@ mod tests { let segment_ids = index .searchable_segment_ids() .expect("Searchable segments failed."); - index_writer - .merge(&segment_ids) - .expect("Failed to initiate merge") - .wait() - .expect("Merging failed"); index.load_searchers().unwrap(); let ref searcher = *index.searcher(); - assert_eq!(searcher.segment_readers().len(), 1); + assert!(segment_ids.is_empty()); + assert!(searcher.segment_readers().is_empty()); assert_eq!(searcher.num_docs(), 0); } } @@ -1303,24 +1299,26 @@ mod tests { index_writer.add_document(doc); index_writer.commit().expect("commit failed"); index_writer.delete_term(Term::from_field_u64(int_field, 1)); - index_writer.commit().expect("commit failed"); - } - index.load_searchers().unwrap(); - let searcher = index.searcher(); - assert_eq!(searcher.num_docs(), 0); - // Merging the segments - { + let segment_ids = index .searchable_segment_ids() .expect("Searchable segments failed."); - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); index_writer .merge(&segment_ids) .expect("Failed to initiate merge") .wait() .expect("Merging failed"); + + // assert delete has not been committed + index.load_searchers().unwrap(); + let searcher = index.searcher(); + assert_eq!(searcher.num_docs(), 2); + + index_writer.commit().unwrap(); + index_writer.wait_merging_threads().unwrap(); } + index.load_searchers().unwrap(); let searcher = index.searcher(); assert_eq!(searcher.num_docs(), 0); diff --git a/src/indexer/segment_manager.rs b/src/indexer/segment_manager.rs index 0e67d3b15..1e805eba1 100644 --- a/src/indexer/segment_manager.rs +++ b/src/indexer/segment_manager.rs @@ -106,6 +106,14 @@ impl SegmentManager { .expect("Failed to acquire write lock on SegmentManager.") } + /// Deletes all empty segments + fn remove_empty_segments(&self) { + let mut registers_lock = self.write(); + registers_lock.committed.segment_entries().iter() + .filter(|segment| segment.meta().num_docs() == 0) + .for_each(|segment| registers_lock.committed.remove_segment(&segment.segment_id())); + } + pub fn commit(&self, segment_entries: Vec) { let mut registers_lock = self.write(); registers_lock.committed.clear(); @@ -229,6 +237,7 @@ impl SegmentManager { } pub fn committed_segment_metas(&self) -> Vec { + self.remove_empty_segments(); let registers_lock = self.read(); registers_lock.committed.segment_metas() } diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index ac2930529..b87aa717b 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -599,4 +599,73 @@ mod tests { assert_eq!(index.searcher().segment_readers().len(), 1); assert_eq!(index.searcher().num_docs(), 302); } + + #[test] + fn delete_all_docs() { + let mut schema_builder = Schema::builder(); + let text_field = schema_builder.add_text_field("text", TEXT); + let schema = schema_builder.build(); + + let index = Index::create_in_ram(schema); + + // writing the segment + let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + + { + for _ in 0..100 { + index_writer.add_document(doc!(text_field=>"a")); + index_writer.add_document(doc!(text_field=>"b")); + } + assert!(index_writer.commit().is_ok()); + } + + { + for _ in 0..100 { + index_writer.add_document(doc!(text_field=>"c")); + index_writer.add_document(doc!(text_field=>"d")); + } + assert!(index_writer.commit().is_ok()); + } + + { + index_writer.add_document(doc!(text_field=>"e")); + index_writer.add_document(doc!(text_field=>"f")); + assert!(index_writer.commit().is_ok()); + } + + { + let seg_ids = index.searchable_segment_ids() + .expect("Searchable segments failed."); + // docs exist, should have at least 1 segment + assert!(seg_ids.len() > 0); + } + + { + let term_vals = vec!["a", "b", "c", "d", "e", "f"]; + for term_val in term_vals { + let term = Term::from_field_text(text_field, term_val); + index_writer.delete_term(term); + assert!(index_writer.commit().is_ok()); + } + } + + { + index_writer + .wait_merging_threads() + .expect("waiting for merging threads"); + } + + index.load_searchers().unwrap(); + assert_eq!(index.searcher().num_docs(), 0); + + let seg_ids = index.searchable_segment_ids() + .expect("Searchable segments failed."); + assert!(seg_ids.is_empty()); + + index.load_searchers().unwrap(); + assert_eq!(index.searcher().num_docs(), 0); + // empty segments should be erased + assert!(index.searchable_segment_metas().unwrap().is_empty()); + assert!(index.searcher().segment_readers().is_empty()); + } } diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 199ce6efd..efaa006ea 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -457,25 +457,13 @@ pub mod tests { index.load_searchers().unwrap(); let searcher = index.searcher(); - let segment_reader = searcher.segment_reader(0); // finally, check that it's empty { - let mut segment_postings = segment_reader - .inverted_index(term_2.field()) - .read_postings(&term_2, IndexRecordOption::Basic) - .unwrap(); - - assert_eq!(segment_postings.skip_next(0), SkipResult::Reached); - assert_eq!(segment_postings.doc(), 0); - assert!(segment_reader.is_deleted(0)); - - let mut segment_postings = segment_reader - .inverted_index(term_2.field()) - .read_postings(&term_2, IndexRecordOption::Basic) - .unwrap(); - - assert_eq!(segment_postings.skip_next(num_docs), SkipResult::End); + let searchable_segment_ids = index.searchable_segment_ids() + .expect("could not get index segment ids"); + assert!(searchable_segment_ids.is_empty()); + assert_eq!(searcher.num_docs(), 0); } }