From 59639cd31180375cc3f0e94ffe12817435c687be Mon Sep 17 00:00:00 2001
From: Paul Masurel <paul.masurel@gmail.com>
Date: Mon, 19 Mar 2018 12:58:42 +0900
Subject: [PATCH] In sync with master. Fixed merging

---
 src/core/inverted_index_reader.rs       |  2 +-
 src/indexer/merger.rs                   | 22 +++++++++--------
 src/postings/mod.rs                     | 33 ++++++++++++++++---------
 src/postings/postings.rs                |  4 +++
 src/postings/segment_postings.rs        | 12 +++------
 src/query/phrase_query/phrase_scorer.rs | 25 +++++--------------
 6 files changed, 49 insertions(+), 49 deletions(-)
diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs
index aff30704c..c6f730f19 100644
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -58,7 +58,7 @@ impl InvertedIndexReader {
             TermDictionaryImpl::empty(field_type),
             ReadOnlySource::empty(),
             ReadOnlySource::empty(),
-            DeleteBitSet::empty(),
+            None,
             record_option,
         )
     }
diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs
index bbdf7c2f7..e47943c45 100644
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -6,7 +6,6 @@ use core::SerializableSegment;
 use indexer::SegmentSerializer;
 use postings::InvertedIndexSerializer;
 use itertools::Itertools;
-use postings::Postings;
 use docset::DocSet;
 use fastfield::DeleteBitSet;
 use schema::{Field, Schema};
@@ -18,6 +17,7 @@ use std::cmp::{max, min};
 use termdict::TermDictionary;
 use termdict::TermStreamer;
 use postings::DeleteSet;
+use postings::Postings;
 
 pub struct IndexMerger {
     schema: Schema,
@@ -206,6 +206,8 @@ impl IndexMerger {
     }
 
     fn write_postings(&self, serializer: &mut InvertedIndexSerializer) -> Result<()> {
+
+        let mut positions_buffer: Vec<u32> = Vec::with_capacity(1_000);
         let mut delta_computer = DeltaComputer::new();
 
         let mut indexed_fields = vec![];
@@ -314,15 +316,15 @@ impl IndexMerger {
                             {
                                 // we make sure to only write the term iff
                                 // there is at least one document.
-                                unreachable!();
-//                                let positions: &[u32] = segment_postings.positions();
-//                                let term_freq = segment_postings.term_freq();
-//                                let delta_positions = delta_computer.compute_delta(positions);
-//                                field_serializer.write_doc(
-//                                    remapped_doc_id,
-//                                    term_freq,
-//                                    delta_positions,
-//                                )?;
+                                let term_freq = segment_postings.term_freq();
+                                segment_postings.positions(&mut positions_buffer);
+
+                                let delta_positions = delta_computer.compute_delta(&positions_buffer);
+                                field_serializer.write_doc(
+                                    remapped_doc_id,
+                                    term_freq,
+                                    delta_positions,
+                                )?;
                             }
                             if !segment_postings.advance() {
                                 break;
diff --git a/src/postings/mod.rs b/src/postings/mod.rs
index 7734d1d6e..1c30e00a2 100644
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -103,15 +103,18 @@ pub mod tests {
         let inverted_index = searcher.segment_reader(0u32).inverted_index(title);
         let term = Term::from_field_text(title, "abc");
 
+        let mut positions = Vec::new();
 
         {
             let mut postings = inverted_index
                 .read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
                 .unwrap();
             postings.advance();
-            assert_eq!(&[0, 1, 2], postings.positions());
+            postings.positions(&mut positions);
+            assert_eq!(&[0, 1, 2], &positions[..]);
             postings.advance();
-            assert_eq!(&[0, 5], postings.positions());
+            postings.positions(&mut positions);
+            assert_eq!(&[0, 5], &positions[..]);
         }
         {
             let mut postings = inverted_index
@@ -119,7 +122,8 @@ pub mod tests {
                 .unwrap();
             postings.advance();
             postings.advance();
-            assert_eq!(&[0, 5], postings.positions());
+            postings.positions(&mut positions);
+            assert_eq!(&[0, 5], &positions[..]);
         }
         {
 
@@ -128,7 +132,8 @@ pub mod tests {
                 .unwrap();
             assert_eq!(postings.skip_next(1), SkipResult::Reached);
             assert_eq!(postings.doc(), 1);
-            assert_eq!(&[0, 5], postings.positions());
+            postings.positions(&mut positions);
+            assert_eq!(&[0, 5], &positions[..]);
         }
         {
             let mut postings = inverted_index
@@ -136,7 +141,8 @@ pub mod tests {
                 .unwrap();
             assert_eq!(postings.skip_next(1002), SkipResult::Reached);
             assert_eq!(postings.doc(), 1002);
-            assert_eq!(&[0, 5], postings.positions());
+            postings.positions(&mut positions);
+            assert_eq!(&[0, 5], &positions[..]);
         }
         {
             let mut postings = inverted_index
@@ -145,12 +151,14 @@ pub mod tests {
             assert_eq!(postings.skip_next(100), SkipResult::Reached);
             assert_eq!(postings.skip_next(1002), SkipResult::Reached);
             assert_eq!(postings.doc(), 1002);
-            assert_eq!(&[0, 5], postings.positions());
+            postings.positions(&mut positions);
+            assert_eq!(&[0, 5], &positions[..]);
         }
     }
 
     #[test]
     pub fn test_position_and_fieldnorm1() {
+        let mut positions = Vec::new();
         let mut schema_builder = SchemaBuilder::default();
         let text_field = schema_builder.add_text_field("text", TEXT);
         let schema = schema_builder.build();
@@ -223,15 +231,16 @@ pub mod tests {
                 assert!(postings_a.advance());
                 assert_eq!(postings_a.doc(), 0);
                 assert_eq!(postings_a.term_freq(), 6);
-                assert_eq!(postings_a.positions(), [0, 2, 4, 6, 7, 13]);
-                assert_eq!(postings_a.positions(), [0, 2, 4, 6, 7, 13]);
+                postings_a.positions(&mut positions);
+                assert_eq!(&positions[..], [0, 2, 4, 6, 7, 13]);
                 assert!(postings_a.advance());
                 assert_eq!(postings_a.doc(), 1u32);
                 assert_eq!(postings_a.term_freq(), 1);
                 for i in 2u32..1000u32 {
                     assert!(postings_a.advance());
                     assert_eq!(postings_a.term_freq(), 1);
-                    assert_eq!(postings_a.positions(), [i]);
+                    postings_a.positions(&mut positions);
+                    assert_eq!(&positions[..], [i]);
                     assert_eq!(postings_a.doc(), i);
                 }
                 assert!(!postings_a.advance());
@@ -246,7 +255,7 @@ pub mod tests {
                 for i in 2u32..1000u32 {
                     assert!(postings_e.advance());
                     assert_eq!(postings_e.term_freq(), i);
-                    let positions = postings_e.positions();
+                    postings_e.positions(&mut positions);
                     assert_eq!(positions.len(), i as usize);
                     for j in 0..positions.len() {
                         assert_eq!(positions[j], (j as u32));
@@ -260,6 +269,7 @@ pub mod tests {
 
     #[test]
     pub fn test_position_and_fieldnorm2() {
+        let mut positions: Vec<u32> = Vec::new();
         let mut schema_builder = SchemaBuilder::default();
         let text_field = schema_builder.add_text_field("text", TEXT);
         let schema = schema_builder.build();
@@ -288,7 +298,8 @@ pub mod tests {
             .unwrap();
         assert!(postings.advance());
         assert_eq!(postings.doc(), 1u32);
-        assert_eq!(postings.positions(), &[1u32, 4]);
+        postings.positions(&mut positions);
+        assert_eq!(&positions[..], &[1u32, 4]);
     }
 
     #[test]
diff --git a/src/postings/postings.rs b/src/postings/postings.rs
index f66c6434d..b415860d5 100644
--- a/src/postings/postings.rs
+++ b/src/postings/postings.rs
@@ -17,4 +17,8 @@ pub trait Postings: DocSet + 'static {
     /// Returns the list of positions of the term, expressed as a list of
     /// token ordinals.
     fn positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>);
+
+    fn positions(&mut self, output: &mut Vec<u32>) {
+        self.positions_with_offset(0u32, output);
+    }
 }
diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs
index 292239f56..154381fbd 100644
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -9,13 +9,10 @@ use std::cmp;
 use fst::Streamer;
 use compression::compressed_block_size;
 use postings::{NoDelete, DeleteSet};
-use std::cell::UnsafeCell;
 use directory::{ReadOnlySource, SourceRead};
 use postings::FreqReadingOption;
 use postings::serializer::PostingsSerializer;
 
-const EMPTY_POSITIONS: [u32; 0] = [0u32; 0];
-
 struct PositionComputer {
     // store the amount of position int
     // before reading positions.
@@ -41,8 +38,7 @@ impl PositionComputer {
     }
 
     // Positions can only be read once.
-    pub fn positions(&mut self, offset: u32, output: &mut [u32]) {
-        let term_freq = output.len();
+    pub fn positions_with_offset(&mut self, offset: u32, output: &mut [u32]) {
         if let Some(num_skip) = self.position_to_skip {
             self.positions_stream.skip(num_skip);
             self.positions_stream.read(output);
@@ -183,7 +179,7 @@ impl<TDeleteSet: DeleteSet> DocSet for SegmentPostings<TDeleteSet> {
                 // add the term freq.
                 if self.position_computer.is_some() {
                     let freqs_skipped = &self.block_cursor.freqs()[self.cur..];
-                    let sum_freq: u32 = freqs_skipped.iter().sum()
+                    let sum_freq: u32 = freqs_skipped.iter().sum();
                     self.position_computer.as_mut()
                         .unwrap()
                         .add_skip(sum_freq as usize);
@@ -319,10 +315,10 @@ impl<TDeleteSet: DeleteSet> Postings for SegmentPostings<TDeleteSet> {
             }
             unsafe {
                 output.set_len(term_freq);
-                self.position_computer.as_mut().unwrap().positions(offset, &mut output[..])
+                self.position_computer.as_mut().unwrap().positions_with_offset(offset, &mut output[..])
             }
         } else {
-            unimplemented!("You may not read positions twice!");
+            output.clear();
         }
     }
 }
diff --git a/src/query/phrase_query/phrase_scorer.rs b/src/query/phrase_query/phrase_scorer.rs
index f77c63d68..f31b4238e 100644
--- a/src/query/phrase_query/phrase_scorer.rs
+++ b/src/query/phrase_query/phrase_scorer.rs
@@ -2,7 +2,6 @@ use DocId;
 use docset::{DocSet, SkipResult};
 use postings::Postings;
 use query::{Intersection, Scorer};
-use std::mem;
 
 
 struct PostingsWithOffset<TPostings> {
@@ -48,7 +47,7 @@ pub struct PhraseScorer<TPostings: Postings> {
     right: Vec<u32>
 }
 
-fn intersection_arr(left: &mut [u32], right: &[u32]) -> usize {
+fn intersection_count(left: &[u32], right: &[u32]) -> usize {
     let mut left_i = 0;
     let mut right_i = 0;
     let mut count = 0;
@@ -58,7 +57,6 @@ fn intersection_arr(left: &mut [u32], right: &[u32]) -> usize {
         } else if right[right_i] < left[left_i] {
             right_i += 1;
         } else {
-            left[count] = left[left_i];
             count+=1;
             left_i += 1;
             right_i += 1;
@@ -95,7 +93,7 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
             {
                 self.intersection_docset.docset_mut_specialized(i).positions(&mut self.right);
             }
-            intersection_len = intersection_arr(&mut self.left[..intersection_len], &self.right[..]);
+            intersection_len = intersection_count(&mut self.left[..intersection_len], &self.right[..]);
             if intersection_len == 0 {
                 return false;
             }
@@ -152,25 +150,14 @@ mod tests {
 
     use tests;
     use test::Bencher;
-    use super::{intersection_arr, intersection_avx};
+    use super::intersection_count;
 
     #[bench]
     fn bench_intersection(b: &mut Bencher) {
-        let left = tests::sample_with_seed(100_000, 0.1, 1);
-        let right = tests::sample_with_seed(200_000, 0.05, 2);
-        let mut output = vec![0u32; 200_000];
+        let left = tests::sample_with_seed(10, 0.1, 1);
+        let right = tests::sample_with_seed(2, 0.05, 2);
         b.iter(|| {
-            intersection_arr(&left, &right, &mut output);
-        });
-    }
-
-    #[bench]
-    fn bench_intersection_avx(b: &mut Bencher) {
-        let left = tests::sample_with_seed(100_000, 0.1, 1);
-        let right = tests::sample_with_seed(200_000, 0.05, 2);
-        let mut output = vec![0u32; 200_000];
-        b.iter(|| {
-            intersection_avx(&left, &right, &mut output);
+            intersection_count(&left, &right);
         });
     }
 }
\ No newline at end of file