From 893932dff85789f9633dc7ec08e9e6ccaa334484 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sun, 30 Oct 2016 17:03:37 +0900 Subject: [PATCH] issue/50 Implementation ooleanScorer. --- src/query/boolean_query/boolean_scorer.rs | 169 +++++++++++++++++++++- 1 file changed, 164 insertions(+), 5 deletions(-) diff --git a/src/query/boolean_query/boolean_scorer.rs b/src/query/boolean_query/boolean_scorer.rs index 38a624c60..b445cd75f 100644 --- a/src/query/boolean_query/boolean_scorer.rs +++ b/src/query/boolean_query/boolean_scorer.rs @@ -1,14 +1,172 @@ use query::Scorer; use DocId; +use Score; +use std::collections::BinaryHeap; +use std::cmp::Ordering; use postings::DocSet; +use query::OccurFilter; -pub struct BooleanScorer { + +struct ScoreCombiner { + coords: Vec, + num_fields: usize, + score: Score, +} + +impl ScoreCombiner { + + fn update(&mut self, score: Score) { + self.score += score; + self.num_fields += 1; + } + + fn clear(&mut self,) { + self.score = 0f32; + self.num_fields = 0; + } + + /// Compute the coord term + fn coord(&self,) -> f32 { + self.coords[self.num_fields] + } + + #[inline] + fn score(&self, ) -> Score { + self.score * self.coord() + } +} + +impl From> for ScoreCombiner { + fn from(coords: Vec) -> ScoreCombiner { + ScoreCombiner { + coords: coords, + num_fields: 0, + score: 0f32, + } + } } -impl DocSet for BooleanScorer { +/// Each `HeapItem` represents the head of +/// a segment postings being merged. +/// +/// * `doc` - is the current doc id for the given segment postings +/// * `ord` - is the ordinal used to identify to which segment postings +/// this heap item belong to. +#[derive(Eq, PartialEq)] +struct HeapItem { + doc: DocId, + ord: u32, +} + +/// `HeapItem` are ordered by the document +impl PartialOrd for HeapItem { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for HeapItem { + fn cmp(&self, other:&Self) -> Ordering { + (other.doc).cmp(&self.doc) + } +} + +pub struct BooleanScorer { + postings: Vec, + queue: BinaryHeap, + doc: DocId, + score_combiner: ScoreCombiner, + filter: OccurFilter, +} + +impl BooleanScorer { + + fn new(postings: Vec, filter: OccurFilter) -> BooleanScorer { + let num_postings = postings.len(); + let query_coords: Vec = (0..num_postings + 1) + .map(|i| (i as Score) / (num_postings as Score)) + .collect(); + let score_combiner = ScoreCombiner::from(query_coords); + let heap_items: Vec = postings + .iter() + .map(|posting| posting.doc()) + .enumerate() + .map(|(ord, doc)| { + HeapItem { + doc: doc, + ord: ord as u32 + } + }) + .collect(); + BooleanScorer { + postings: postings, + queue: BinaryHeap::from(heap_items), + doc: 0u32, + score_combiner: score_combiner, + filter: filter, + + } + } + + + /// Advances the head of our heap (the segment postings with the lowest doc) + /// It will also update the new current `DocId` as well as the term frequency + /// associated with the segment postings. + /// + /// After advancing the `SegmentPosting`, the postings is removed from the heap + /// if it has been entirely consumed, or pushed back into the heap. + /// + /// # Panics + /// This method will panic if the head `SegmentPostings` is not empty. + fn advance_head(&mut self,) { + { + let mut mutable_head = self.queue.peek_mut().unwrap(); + let cur_postings = &mut self.postings[mutable_head.ord as usize]; + if cur_postings.advance() { + mutable_head.doc = cur_postings.doc(); + return; + } + + } + self.queue.pop(); + } +} + +impl DocSet for BooleanScorer { fn advance(&mut self,) -> bool { - panic!("a"); + loop { + self.score_combiner.clear(); + let mut ord_bitset = 0u64; + match self.queue.peek() { + Some(heap_item) => { + let ord = heap_item.ord as usize; + self.doc = heap_item.doc; + let score = self.postings[ord].score(); + self.score_combiner.update(score); + ord_bitset |= 1 << ord; + } + None => { + return false; + } + } + self.advance_head(); + while let Some(&HeapItem {doc, ord}) = self.queue.peek() { + if doc == self.doc { + let ord = ord as usize; + let score = self.postings[ord].score(); + self.score_combiner.update(score); + ord_bitset |= 1 << ord; + } + else { + break; + } + self.advance_head(); + } + if self.filter.accept(ord_bitset) { + return true; + } + } } fn doc(&self,) -> DocId { @@ -16,9 +174,10 @@ impl DocSet for BooleanScorer { } } -impl Scorer for BooleanScorer { +impl Scorer for BooleanScorer { fn score(&self,) -> f32 { panic!(""); } -} \ No newline at end of file +} +