Tantivy-288 (#472)

* add unit test

* improved test

* added SegmentManager#remove_empty_segments

* update old tests for new behaviour

* cleaner filter for empty segments

* PR adjustments

* rename x in closures

* simplify assert_eq!(vec.len(), 0)

* wait_merging_threads

* acquire searchers

* add comments to test

* rebased on latest master

* harden test

* fix merger#test_merge_multivalued_int_fields_all_deleted test
This commit is contained in:
barrotsteindev
2019-01-24 01:58:56 +02:00
committed by Paul Masurel
parent 5292e78860
commit 222b7f2580
4 changed files with 94 additions and 30 deletions

View File

@@ -1130,15 +1130,11 @@ mod tests {
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
index_writer
.merge(&segment_ids)
.expect("Failed to initiate merge")
.wait()
.expect("Merging failed");
index.load_searchers().unwrap();
let ref searcher = *index.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
assert!(segment_ids.is_empty());
assert!(searcher.segment_readers().is_empty());
assert_eq!(searcher.num_docs(), 0);
}
}
@@ -1303,24 +1299,26 @@ mod tests {
index_writer.add_document(doc);
index_writer.commit().expect("commit failed");
index_writer.delete_term(Term::from_field_u64(int_field, 1));
index_writer.commit().expect("commit failed");
}
index.load_searchers().unwrap();
let searcher = index.searcher();
assert_eq!(searcher.num_docs(), 0);
// Merging the segments
{
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
index_writer
.merge(&segment_ids)
.expect("Failed to initiate merge")
.wait()
.expect("Merging failed");
// assert delete has not been committed
index.load_searchers().unwrap();
let searcher = index.searcher();
assert_eq!(searcher.num_docs(), 2);
index_writer.commit().unwrap();
index_writer.wait_merging_threads().unwrap();
}
index.load_searchers().unwrap();
let searcher = index.searcher();
assert_eq!(searcher.num_docs(), 0);

View File

@@ -106,6 +106,14 @@ impl SegmentManager {
.expect("Failed to acquire write lock on SegmentManager.")
}
/// Deletes all empty segments
fn remove_empty_segments(&self) {
let mut registers_lock = self.write();
registers_lock.committed.segment_entries().iter()
.filter(|segment| segment.meta().num_docs() == 0)
.for_each(|segment| registers_lock.committed.remove_segment(&segment.segment_id()));
}
pub fn commit(&self, segment_entries: Vec<SegmentEntry>) {
let mut registers_lock = self.write();
registers_lock.committed.clear();
@@ -229,6 +237,7 @@ impl SegmentManager {
}
pub fn committed_segment_metas(&self) -> Vec<SegmentMeta> {
self.remove_empty_segments();
let registers_lock = self.read();
registers_lock.committed.segment_metas()
}

View File

@@ -599,4 +599,73 @@ mod tests {
assert_eq!(index.searcher().segment_readers().len(), 1);
assert_eq!(index.searcher().num_docs(), 302);
}
#[test]
fn delete_all_docs() {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
// writing the segment
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
{
for _ in 0..100 {
index_writer.add_document(doc!(text_field=>"a"));
index_writer.add_document(doc!(text_field=>"b"));
}
assert!(index_writer.commit().is_ok());
}
{
for _ in 0..100 {
index_writer.add_document(doc!(text_field=>"c"));
index_writer.add_document(doc!(text_field=>"d"));
}
assert!(index_writer.commit().is_ok());
}
{
index_writer.add_document(doc!(text_field=>"e"));
index_writer.add_document(doc!(text_field=>"f"));
assert!(index_writer.commit().is_ok());
}
{
let seg_ids = index.searchable_segment_ids()
.expect("Searchable segments failed.");
// docs exist, should have at least 1 segment
assert!(seg_ids.len() > 0);
}
{
let term_vals = vec!["a", "b", "c", "d", "e", "f"];
for term_val in term_vals {
let term = Term::from_field_text(text_field, term_val);
index_writer.delete_term(term);
assert!(index_writer.commit().is_ok());
}
}
{
index_writer
.wait_merging_threads()
.expect("waiting for merging threads");
}
index.load_searchers().unwrap();
assert_eq!(index.searcher().num_docs(), 0);
let seg_ids = index.searchable_segment_ids()
.expect("Searchable segments failed.");
assert!(seg_ids.is_empty());
index.load_searchers().unwrap();
assert_eq!(index.searcher().num_docs(), 0);
// empty segments should be erased
assert!(index.searchable_segment_metas().unwrap().is_empty());
assert!(index.searcher().segment_readers().is_empty());
}
}

View File

@@ -457,25 +457,13 @@ pub mod tests {
index.load_searchers().unwrap();
let searcher = index.searcher();
let segment_reader = searcher.segment_reader(0);
// finally, check that it's empty
{
let mut segment_postings = segment_reader
.inverted_index(term_2.field())
.read_postings(&term_2, IndexRecordOption::Basic)
.unwrap();
assert_eq!(segment_postings.skip_next(0), SkipResult::Reached);
assert_eq!(segment_postings.doc(), 0);
assert!(segment_reader.is_deleted(0));
let mut segment_postings = segment_reader
.inverted_index(term_2.field())
.read_postings(&term_2, IndexRecordOption::Basic)
.unwrap();
assert_eq!(segment_postings.skip_next(num_docs), SkipResult::End);
let searchable_segment_ids = index.searchable_segment_ids()
.expect("could not get index segment ids");
assert!(searchable_segment_ids.is_empty());
assert_eq!(searcher.num_docs(), 0);
}
}