From 236fa74767c8dd0feb3bb2a4d01c346eb6dafdbb Mon Sep 17 00:00:00 2001
From: Paul Masurel <paul.masurel@gmail.com>
Date: Sat, 5 Aug 2017 23:17:35 +0900
Subject: [PATCH] Positions almost working.

---
 src/core/segment_reader.rs          |   5 +-
 src/postings/mod.rs                 |  13 ++--
 src/postings/segment_postings.rs    | 101 +++++++++++++++++++++++++---
 src/query/term_query/term_weight.rs |   6 +-
 4 files changed, 103 insertions(+), 22 deletions(-)
diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs
index ff66273e2..56247e942 100644
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -24,7 +24,6 @@ use postings::SegmentPostingsOption;
 use postings::{SegmentPostings, BlockSegmentPostings};
 use fastfield::{FastFieldsReader, FastFieldReader, U64FastFieldReader};
 use schema::Schema;
-use postings::FreqHandler;
 
 
 
@@ -198,10 +197,10 @@ impl SegmentReader {
     /// For instance, requesting `SegmentPostingsOption::FreqAndPositions` for a
     /// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
     /// with `DocId`s and frequencies.
-    pub fn read_postings(&self,
+    pub fn read_postings<'a>(&'a self,
                          term: &Term,
                          option: SegmentPostingsOption)
-                         -> Option<SegmentPostings> {
+                         -> Option<SegmentPostings<'a>> {
         let field = term.field();
         let field_entry = self.schema.get_field_entry(field);
         let term_info = get!(self.get_term_info(term));
diff --git a/src/postings/mod.rs b/src/postings/mod.rs
index ed8a6998f..21cfa6777 100644
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -12,7 +12,6 @@ mod term_info;
 mod vec_postings;
 mod segment_postings;
 mod intersection;
-mod freq_handler;
 mod docset;
 mod segment_postings_option;
 
@@ -28,7 +27,6 @@ pub use self::vec_postings::VecPostings;
 
 pub use self::segment_postings::{SegmentPostings, BlockSegmentPostings};
 pub use self::intersection::IntersectionDocSet;
-pub use self::freq_handler::FreqHandler;
 pub use self::segment_postings_option::SegmentPostingsOption;
 pub use common::HasLen;
 
@@ -63,18 +61,18 @@ mod tests {
         let mut posting_serializer = PostingsSerializer::open(&mut segment).unwrap();
         posting_serializer.new_field(text_field);
         posting_serializer.new_term("abc".as_bytes()).unwrap();
-        for doc_id in 0u32..3u32 {
-            let positions = vec![1, 2, 3, 2];
-            posting_serializer.write_doc(doc_id, 2, &positions).unwrap();
+        for doc_id in 0u32..120u32 {
+            let delta_positions = vec![1, 2, 3, 2];
+            posting_serializer.write_doc(doc_id, 2, &delta_positions).unwrap();
         }
         posting_serializer.close_term().unwrap();
         posting_serializer.close().unwrap();
         let read = segment.open_read(SegmentComponent::POSITIONS).unwrap();
-        assert!(read.len() <= 16);
+        assert!(read.len() <= 140);
     }
 
     #[test]
-    pub fn test_position_and_fieldnorm() {
+    pub fn test_position_and_fieldnorm1() {
         let mut schema_builder = SchemaBuilder::default();
         let text_field = schema_builder.add_text_field("text", TEXT);
         let schema = schema_builder.build();
@@ -144,6 +142,7 @@ mod tests {
                 assert_eq!(postings_a.doc(), 0);
                 assert_eq!(postings_a.term_freq(), 6);
                 assert_eq!(postings_a.positions(), [0, 2, 4, 6, 7, 13]);
+                assert_eq!(postings_a.positions(), [0, 2, 4, 6, 7, 13]);
                 assert!(postings_a.advance());
                 assert_eq!(postings_a.doc(), 1u32);
                 assert_eq!(postings_a.term_freq(), 1);
diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs
index 553c50f70..ab4805d5e 100644
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -4,9 +4,65 @@ use postings::{Postings, DocSet, HasLen, SkipResult};
 use std::cmp;
 use fst::Streamer;
 use fastfield::DeleteBitSet;
-
+use std::cell::UnsafeCell;
 
 const EMPTY_DATA: [u8; 0] = [0u8; 0];
+const EMPTY_POSITIONS: [u32; 0] = [0u32; 0];
+
+struct PositionComputer<'a> {
+    // store the amount of position int
+    // before reading positions.
+    //
+    // if none, position are already loaded in
+    // the positions vec.
+    position_to_skip: Option<usize>,
+
+    delta_positions: Vec<u32>,
+    positions: Vec<u32>,
+    positions_stream: CompressedIntStream<'a>,
+}
+
+impl<'a> PositionComputer<'a> {
+
+    pub fn new(positions_stream: CompressedIntStream<'a>) -> PositionComputer<'a> {
+        PositionComputer {
+            position_to_skip: None,
+            positions: vec!(),
+            delta_positions: vec!(),
+            positions_stream: positions_stream,
+        }
+    }
+
+    pub fn add_skip(&mut self, num_skip: usize) {
+        self.position_to_skip = Some(
+            self.position_to_skip
+                .map(|prev_skip| prev_skip + num_skip)
+                .unwrap_or(0)
+            );
+        }
+
+    pub fn positions(&mut self, term_freq: usize) -> &[u32] {
+        self.delta_positions(term_freq);
+        &self.positions[..term_freq]
+    }
+
+    pub fn delta_positions(&mut self, term_freq: usize) -> &[u32] {
+        if let Some(num_skip) = self.position_to_skip {
+            self.delta_positions.resize(term_freq, 0u32);
+            self.positions_stream.skip(num_skip);
+            self.positions_stream.read(&mut self.delta_positions[..term_freq]);
+            self.positions.resize(term_freq, 0u32);
+            let mut cum = 0u32;
+            for i in 0..term_freq as usize {
+                cum += self.delta_positions[i];
+                self.positions[i] = cum;
+            }
+            self.position_to_skip = None;
+        }
+        &self.delta_positions[..term_freq]
+    }
+}
+
 
 
 /// `SegmentPostings` represents the inverted list or postings associated to
@@ -18,9 +74,11 @@ pub struct SegmentPostings<'a> {
     block_cursor: BlockSegmentPostings<'a>,
     cur: usize,
     delete_bitset: DeleteBitSet,
-    positions_stream: Option<CompressedIntStream<'a>>,
+
+    position_computer: Option<UnsafeCell<PositionComputer<'a>>>,
 }
 
+
 impl<'a> SegmentPostings<'a> {
     /// Reads a Segment postings from an &[u8]
     ///
@@ -30,24 +88,27 @@ impl<'a> SegmentPostings<'a> {
     ///   frequencies and/or positions
     pub fn from_block_postings(segment_block_postings: BlockSegmentPostings<'a>,
                                delete_bitset: DeleteBitSet,
-                               positions_stream: Option<CompressedIntStream<'a>>)
+                               positions_stream_opt: Option<CompressedIntStream<'a>>)
                                -> SegmentPostings<'a> {
+        let position_computer = positions_stream_opt.map(|stream| {
+            UnsafeCell::new(PositionComputer::new(stream))
+        });
         SegmentPostings {
             block_cursor: segment_block_postings,
             cur: NUM_DOCS_PER_BLOCK, // cursor within the block
             delete_bitset: delete_bitset,
-            positions_stream: positions_stream
+            position_computer: position_computer,
         }
     }
 
     /// Returns an empty segment postings object
-    pub fn empty() -> SegmentPostings<'static> {
+    pub fn empty() -> SegmentPostings<'a> {
         let empty_block_cursor = BlockSegmentPostings::empty();
         SegmentPostings {
             block_cursor: empty_block_cursor,
             delete_bitset: DeleteBitSet::empty(),
             cur: NUM_DOCS_PER_BLOCK,
-            positions_stream: None,
+            position_computer: None,
         }
     }
 }
@@ -58,7 +119,9 @@ impl<'a> DocSet for SegmentPostings<'a> {
     // next needs to be called a first time to point to the correct element.
     #[inline]
     fn advance(&mut self) -> bool {
+        let mut pos_to_skip = 0u32;
         loop {
+            pos_to_skip += self.term_freq();
             self.cur += 1;
             if self.cur >= self.block_cursor.block_len() {
                 self.cur = 0;
@@ -68,6 +131,11 @@ impl<'a> DocSet for SegmentPostings<'a> {
                 }
             }
             if !self.delete_bitset.is_deleted(self.doc()) {
+                if let Some(ref mut position_computer) = self.position_computer.as_mut() {
+                    unsafe {
+                        (*position_computer.get()).add_skip(pos_to_skip as usize);
+                    }
+                }
                 return true;
             }
         }
@@ -181,11 +249,26 @@ impl<'a> Postings for SegmentPostings<'a> {
     }
 
     fn positions(&self) -> &[u32] {
-        unimplemented!();
+        let term_freq = self.term_freq();
+        let position_computer_ptr: *mut PositionComputer = self.position_computer
+            .as_ref()
+            .expect("Segment reader does not have positions.")
+            .get();
+        unsafe {
+            (&mut *position_computer_ptr).positions(term_freq as usize)
+        }
     }
 
     fn delta_positions(&self) -> &[u32] {
-        unimplemented!();
+        let term_freq = self.term_freq();
+        self.position_computer
+            .as_ref()
+            .map(|position_computer| {
+                unsafe {
+                    (&mut *position_computer.get()).delta_positions(term_freq as usize)
+                }
+            })
+            .unwrap_or(&EMPTY_POSITIONS[..])
     }
 
 }
@@ -333,7 +416,7 @@ impl<'a> BlockSegmentPostings<'a> {
             num_vint_docs: 0,
 
             doc_decoder: BlockDecoder::new(),
-            freq_decoder: BlockDecoder::new(),
+            freq_decoder: BlockDecoder::with_val(1),
             has_freq: false,
 
             remaining_data: &EMPTY_DATA,
diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs
index e781ebdbd..99bfa7d47 100644
--- a/src/query/term_query/term_weight.rs
+++ b/src/query/term_query/term_weight.rs
@@ -27,13 +27,13 @@ impl TermWeight {
         1.0 + (self.num_docs as f32 / (self.doc_freq as f32 + 1.0)).ln()
     }
 
-    pub fn specialized_scorer<'a>(&'a self,
+    pub fn specialized_scorer<'a>(&self,
                                   reader: &'a SegmentReader)
                                   -> Result<TermScorer<SegmentPostings<'a>>> {
         let field = self.term.field();
         let fieldnorm_reader_opt = reader.get_fieldnorms_reader(field);
-        Ok(reader
-               .read_postings(&self.term, self.segment_postings_options)
+        let postings: Option<SegmentPostings<'a>> = reader.read_postings(&self.term, self.segment_postings_options);
+        Ok(postings
                .map(|segment_postings| {
                         TermScorer {
                             idf: self.idf(),