From 545a7ec8ddfcf9d56f09a00c9a05c0aa1730e76d Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Tue, 18 Dec 2018 09:18:46 +0900 Subject: [PATCH] Closes #457 --- src/postings/segment_postings.rs | 48 ++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index 776844f2a..0358806c7 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -126,7 +126,6 @@ impl SegmentPostings { fn exponential_search(target: u32, arr: &[u32]) -> (usize, usize) { let mut start = 0; let end = arr.len(); - debug_assert!(target >= arr[start]); debug_assert!(target <= arr[end - 1]); let mut jump = 1; loop { @@ -216,11 +215,10 @@ impl DocSet for SegmentPostings { // we're in the right block now, start with an exponential search let block_docs = self.block_cursor.docs(); - - debug_assert!(target >= self.doc()); let new_cur = self .cur .wrapping_add(search_within_block(&block_docs[self.cur..], target)); + if need_positions { sum_freqs_skipped += self.block_cursor.freqs()[self.cur..new_cur] .iter() @@ -634,6 +632,7 @@ mod tests { use schema::Term; use schema::INT_INDEXED; use DocId; + use SkipResult; #[test] fn test_empty_segment_postings() { @@ -692,7 +691,7 @@ mod tests { #[test] fn test_block_segment_postings() { - let mut block_segments = build_block_postings((0..100_000).collect::>()); + let mut block_segments = build_block_postings(&(0..100_000).collect::>()); let mut offset: u32 = 0u32; // checking that the block before calling advance is empty assert!(block_segments.docs().is_empty()); @@ -706,14 +705,45 @@ mod tests { } } - fn build_block_postings(docs: Vec) -> BlockSegmentPostings { + + #[test] + fn test_skip_right_at_new_block() { + let mut doc_ids = (0..128).collect::>(); + doc_ids.push(129); + doc_ids.push(130); + { + let block_segments = build_block_postings(&doc_ids); + let mut docset = SegmentPostings::from_block_postings(block_segments, None); + assert_eq!(docset.skip_next(128), SkipResult::OverStep); + assert_eq!(docset.doc(), 129); + assert!(docset.advance()); + assert_eq!(docset.doc(), 130); + assert!(!docset.advance()); + } + { + let block_segments = build_block_postings(&doc_ids); + let mut docset = SegmentPostings::from_block_postings(block_segments, None); + assert_eq!(docset.skip_next(129), SkipResult::Reached); + assert_eq!(docset.doc(), 129); + assert!(docset.advance()); + assert_eq!(docset.doc(), 130); + assert!(!docset.advance()); + } + { + let block_segments = build_block_postings(&doc_ids); + let mut docset = SegmentPostings::from_block_postings(block_segments, None); + assert_eq!(docset.skip_next(131), SkipResult::End); + } + } + + fn build_block_postings(docs: &[DocId]) -> BlockSegmentPostings { let mut schema_builder = SchemaBuilder::default(); let int_field = schema_builder.add_u64_field("id", INT_INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); let mut last_doc = 0u32; - for doc in docs { + for &doc in docs { for _ in last_doc..doc { index_writer.add_document(doc!(int_field=>1u64)); } @@ -733,7 +763,7 @@ mod tests { #[test] fn test_block_segment_postings_skip() { for i in 0..4 { - let mut block_postings = build_block_postings(vec![3]); + let mut block_postings = build_block_postings(&[3]); assert_eq!( block_postings.skip_to(i), BlockSegmentPostingsSkipResult::Success(0u32) @@ -743,7 +773,7 @@ mod tests { BlockSegmentPostingsSkipResult::Terminated ); } - let mut block_postings = build_block_postings(vec![3]); + let mut block_postings = build_block_postings(&[3]); assert_eq!( block_postings.skip_to(4u32), BlockSegmentPostingsSkipResult::Terminated @@ -756,7 +786,7 @@ mod tests { for i in 0..1300 { docs.push((i * i / 100) + i); } - let mut block_postings = build_block_postings(docs.clone()); + let mut block_postings = build_block_postings(&docs[..]); for i in vec![0, 424, 10000] { assert_eq!( block_postings.skip_to(i),