From ffa558e3a9f658bbc43b4c2cd6952a78e1b2ecff Mon Sep 17 00:00:00 2001 From: Neil Hansen <51208969+neilyio@users.noreply.github.com> Date: Tue, 14 Jan 2025 11:58:01 -0800 Subject: [PATCH] fix: tests in ci (#18) --- src/indexer/index_writer.rs | 10 +++++++-- src/indexer/merger.rs | 9 +++++--- src/indexer/segment_updater.rs | 38 ++++++++++++++++++++-------------- src/postings/mod.rs | 8 ++++--- 4 files changed, 41 insertions(+), 24 deletions(-) diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 1ba92d6de..f0da70c49 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -1089,7 +1089,10 @@ mod tests { index_writer.commit()?; reader.reload().unwrap(); - assert_eq!(num_docs_containing("a"), 0); + // In Tantivy upstream, this test results in 0 segments after delete. + // However, due to our custom, visibility rules, we leave the segment. + // See committed_segment_metas in segment_manager.rs. + assert_eq!(num_docs_containing("a"), 1); index_writer.merge(&segments); index_writer.wait_merging_threads().unwrap(); @@ -1135,7 +1138,10 @@ mod tests { index_writer.commit()?; reader.reload().unwrap(); - assert_eq!(num_docs_containing("a"), 0); + // In Tantivy upstream, this test results in 0 segments after delete. + // However, due to our custom, visibility rules, we leave the segment. + // See committed_segment_metas in segment_manager.rs. + assert_eq!(num_docs_containing("a"), 4); index_writer.merge(&segments); index_writer.wait_merging_threads().unwrap(); diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 47ac5a55b..0a497b7d7 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -1032,12 +1032,15 @@ mod tests { // Test removing all docs index_writer.delete_term(Term::from_field_text(text_field, "g")); index_writer.commit()?; - let segment_ids = index.searchable_segment_ids()?; + let _segment_ids = index.searchable_segment_ids()?; reader.reload()?; let searcher = reader.searcher(); - assert!(segment_ids.is_empty()); - assert!(searcher.segment_readers().is_empty()); + // In Tantivy upstream, this test results in 0 segments after delete. + // However, due to our custom, visibility rules, we leave the segment. + // See committed_segment_metas in segment_manager.rs. + // assert!(segment_ids.is_empty()); + // assert!(searcher.segment_readers().is_empty()); assert_eq!(searcher.num_docs(), 0); } Ok(()) diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index 7e8d685b7..0b1c58657 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -810,9 +810,11 @@ mod tests { } index_writer.commit()?; - let seg_ids = index.searchable_segment_ids()?; - // docs exist, should have at least 1 segment - assert!(!seg_ids.is_empty()); + let _seg_ids = index.searchable_segment_ids()?; + // In Tantivy upstream, this test results in 0 segments after delete. + // However, due to our custom, visibility rules, we leave the segment. + // See committed_segment_metas in segment_manager.rs. + // assert!(!seg_ids.is_empty()); let term = Term::from_field_text(text_field, "a"); index_writer.delete_term(term); @@ -827,14 +829,15 @@ mod tests { let reader = index.reader()?; assert_eq!(reader.searcher().num_docs(), 0); - let seg_ids = index.searchable_segment_ids()?; - assert!(seg_ids.is_empty()); + let _seg_ids = index.searchable_segment_ids()?; + // Skipped due to custom ParadeDB visibility rules. + // assert!(seg_ids.is_empty()); reader.reload()?; assert_eq!(reader.searcher().num_docs(), 0); - // empty segments should be erased - assert!(index.searchable_segment_metas()?.is_empty()); - assert!(reader.searcher().segment_readers().is_empty()); + // Skipped due to custom ParadeDB visibility rules. + // assert!(index.searchable_segment_metas()?.is_empty()); + // assert!(reader.searcher().segment_readers().is_empty()); Ok(()) } @@ -864,9 +867,11 @@ mod tests { index_writer.add_document(doc!(text_field=>"f"))?; index_writer.commit()?; - let seg_ids = index.searchable_segment_ids()?; - // docs exist, should have at least 1 segment - assert!(!seg_ids.is_empty()); + let _seg_ids = index.searchable_segment_ids()?; + // In Tantivy upstream, this test results in 0 segments after delete. + // However, due to our custom, visibility rules, we leave the segment. + // See committed_segment_metas in segment_manager.rs. + // assert!(!seg_ids.is_empty()); let term_vals = vec!["a", "b", "c", "d", "e", "f"]; for term_val in term_vals { @@ -880,14 +885,15 @@ mod tests { let reader = index.reader()?; assert_eq!(reader.searcher().num_docs(), 0); - let seg_ids = index.searchable_segment_ids()?; - assert!(seg_ids.is_empty()); + let _seg_ids = index.searchable_segment_ids()?; + // Skipped due to custom ParadeDB visibility rules. + // assert!(seg_ids.is_empty()); reader.reload()?; assert_eq!(reader.searcher().num_docs(), 0); - // empty segments should be erased - assert!(index.searchable_segment_metas()?.is_empty()); - assert!(reader.searcher().segment_readers().is_empty()); + // Skipped due to custom ParadeDB visibility rules. + // assert!(index.searchable_segment_metas()?.is_empty()); + // assert!(reader.searcher().segment_readers().is_empty()); Ok(()) } diff --git a/src/postings/mod.rs b/src/postings/mod.rs index efc0e069d..41b9ab0c3 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -491,10 +491,12 @@ pub(crate) mod tests { } let searcher = index.reader()?.searcher(); - // finally, check that it's empty + // In Tantivy upstream, this test results in 0 segments after delete. + // However, due to our custom, visibility rules, we leave the segment. + // See committed_segment_metas in segment_manager.rs. { - let searchable_segment_ids = index.searchable_segment_ids()?; - assert!(searchable_segment_ids.is_empty()); + let _searchable_segment_ids = index.searchable_segment_ids()?; + // assert!(searchable_segment_ids.is_empty()); assert_eq!(searcher.num_docs(), 0); } Ok(())