diff --git a/src/collector/chained_collector.rs b/src/collector/chained_collector.rs index 1f419cd7f..89f872516 100644 --- a/src/collector/chained_collector.rs +++ b/src/collector/chained_collector.rs @@ -42,8 +42,8 @@ impl Collector for ChainedCollector Result<()> { - try!(self.left.set_segment(segment_local_id, segment)); - try!(self.right.set_segment(segment_local_id, segment)); + self.left.set_segment(segment_local_id, segment)?; + self.right.set_segment(segment_local_id, segment)?; Ok(()) } diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs index 09b191927..fc5c98afb 100644 --- a/src/collector/multi_collector.rs +++ b/src/collector/multi_collector.rs @@ -29,7 +29,7 @@ impl<'a> Collector for MultiCollector<'a> { segment: &SegmentReader, ) -> Result<()> { for collector in &mut self.collectors { - try!(collector.set_segment(segment_local_id, segment)); + collector.set_segment(segment_local_id, segment)?; } Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index bb97f7040..78466f126 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,6 @@ #![allow(new_without_default)] #![warn(missing_docs)] - //! # `tantivy` //! //! Tantivy is a search engine library. diff --git a/src/postings/intersection.rs b/src/postings/intersection.rs index 85a6037d7..74b636f8e 100644 --- a/src/postings/intersection.rs +++ b/src/postings/intersection.rs @@ -4,7 +4,6 @@ use query::Scorer; use DocId; use Score; - /// Creates a `DocSet` that iterator through the intersection of two `DocSet`s. pub struct Intersection { docsets: Vec, @@ -77,7 +76,6 @@ impl DocSet for Intersection { // We optimize skipping by skipping every single member // of the intersection to target. - // TODO fix BUG... // what if we overstep on the second member of the intersection? // The first member is not necessarily correct. @@ -112,8 +110,6 @@ impl DocSet for Intersection { return SkipResult::OverStep; } } - - } fn doc(&self) -> DocId { @@ -130,16 +126,14 @@ impl DocSet for Intersection { } impl Scorer for Intersection - where TScorer: Scorer { +where + TScorer: Scorer, +{ fn score(&mut self) -> Score { - self.docsets - .iter_mut() - .map(Scorer::score) - .sum() + self.docsets.iter_mut().map(Scorer::score).sum() } } - #[cfg(test)] mod tests { use postings::SkipResult; @@ -178,7 +172,6 @@ mod tests { assert_eq!(intersection.doc(), 0); } - #[test] fn test_intersection_skip() { let left = VecPostings::from(vec![0, 1, 2, 4]); @@ -188,30 +181,38 @@ mod tests { assert_eq!(intersection.doc(), 2); } - #[test] fn test_intersection_skip_against_unoptimized() { - test_skip_against_unoptimized(|| { - let left = VecPostings::from(vec![4]); - let right = VecPostings::from(vec![2, 5]); - box Intersection::from(vec![left, right]) - }, vec![0,2,4,5,6]); - test_skip_against_unoptimized(|| { - let mut left = VecPostings::from(vec![1, 4, 5, 6]); - let mut right = VecPostings::from(vec![2, 5, 10]); - left.advance(); - right.advance(); - box Intersection::from(vec![left, right]) - }, vec![0,1,2,3,4,5,6,7,10,11]); - test_skip_against_unoptimized(|| { - box Intersection::from(vec![ - VecPostings::from(vec![1, 4, 5, 6]), - VecPostings::from(vec![1, 2, 5, 6]), - VecPostings::from(vec![1, 4, 5, 6]), - VecPostings::from(vec![1, 5, 6]), - VecPostings::from(vec![2, 4, 5, 7, 8]) - ]) - }, vec![0,1,2,3,4,5,6,7,10,11]); + test_skip_against_unoptimized( + || { + let left = VecPostings::from(vec![4]); + let right = VecPostings::from(vec![2, 5]); + box Intersection::from(vec![left, right]) + }, + vec![0, 2, 4, 5, 6], + ); + test_skip_against_unoptimized( + || { + let mut left = VecPostings::from(vec![1, 4, 5, 6]); + let mut right = VecPostings::from(vec![2, 5, 10]); + left.advance(); + right.advance(); + box Intersection::from(vec![left, right]) + }, + vec![0, 1, 2, 3, 4, 5, 6, 7, 10, 11], + ); + test_skip_against_unoptimized( + || { + box Intersection::from(vec![ + VecPostings::from(vec![1, 4, 5, 6]), + VecPostings::from(vec![1, 2, 5, 6]), + VecPostings::from(vec![1, 4, 5, 6]), + VecPostings::from(vec![1, 5, 6]), + VecPostings::from(vec![2, 4, 5, 7, 8]), + ]) + }, + vec![0, 1, 2, 3, 4, 5, 6, 7, 10, 11], + ); } #[test] diff --git a/src/postings/mod.rs b/src/postings/mod.rs index c4d0162b7..d4570475f 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -633,7 +633,6 @@ pub mod tests { }); } - /// Wraps a given docset, and forward alls call but the /// `.skip_next(...)`. This is useful to test that a specialized /// implementation of `.skip_next(...)` is consistent @@ -660,25 +659,38 @@ pub mod tests { } } - pub fn test_skip_against_unoptimizedBox>(postings_factory: F, targets: Vec) { + pub fn test_skip_against_unoptimized Box>( + postings_factory: F, + targets: Vec, + ) { for target in targets { let mut postings_opt = postings_factory(); let mut postings_unopt = UnoptimizedDocSet::wrap(postings_factory()); let skip_result_opt = postings_opt.skip_next(target); let skip_result_unopt = postings_unopt.skip_next(target); - assert_eq!(skip_result_unopt, skip_result_opt, "Failed while skipping to {}", target); + assert_eq!( + skip_result_unopt, skip_result_opt, + "Failed while skipping to {}", + target + ); match skip_result_opt { SkipResult::Reached => assert_eq!(postings_opt.doc(), target), SkipResult::OverStep => assert!(postings_opt.doc() > target), - SkipResult::End => { return; }, + SkipResult::End => { + return; + } } while postings_opt.advance() { assert!(postings_unopt.advance()); - assert_eq!(postings_opt.doc(), postings_unopt.doc(), "Failed while skipping to {}", target); + assert_eq!( + postings_opt.doc(), + postings_unopt.doc(), + "Failed while skipping to {}", + target + ); } assert!(!postings_unopt.advance()); } } - } diff --git a/src/postings/union.rs b/src/postings/union.rs index 575a0f69b..853cae685 100644 --- a/src/postings/union.rs +++ b/src/postings/union.rs @@ -1,18 +1,18 @@ use postings::DocSet; +use query::Scorer; use postings::SkipResult; use common::TinySet; use std::cmp::Ordering; use DocId; use query::score_combiner::{DoNothingCombiner, ScoreCombiner}; - const HORIZON_NUM_TINYBITSETS: usize = 32; const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32; /// Creates a `DocSet` that iterator through the intersection of two `DocSet`s. -pub struct Union - where TDocSet: DocSet, TScoreCombiner: ScoreCombiner { - docsets: Vec, +pub struct Union +{ + docsets: Vec, bitsets: Box<[TinySet; HORIZON_NUM_TINYBITSETS]>, scores: Box<[TScoreCombiner; HORIZON as usize]>, cursor: usize, @@ -20,60 +20,65 @@ pub struct Union doc: DocId, } -impl From> for Union { - fn from(docsets: Vec) -> Union { - let non_empty_docsets: Vec = - docsets - .into_iter() - .flat_map(|mut docset| { +impl From> + for Union + where TScoreCombiner: ScoreCombiner, TScorer: Scorer +{ + fn from(docsets: Vec) -> Union { + let non_empty_docsets: Vec = docsets + .into_iter() + .flat_map( + |mut docset| { if docset.advance() { Some(docset) } else { None } - }) - .collect(); + }, + ) + .collect(); Union { docsets: non_empty_docsets, bitsets: Box::new([TinySet::empty(); HORIZON_NUM_TINYBITSETS]), - scores: Box::new([TScoreCombiner::default(); HORIZON]), + scores: Box::new([TScoreCombiner::default(); HORIZON as usize]), cursor: HORIZON_NUM_TINYBITSETS, offset: 0, - doc: 0 + doc: 0, } } } - -fn refill(docsets: &mut Vec, bitsets: &mut [TinySet; HORIZON_NUM_TINYBITSETS], min_doc: DocId) { - docsets - .drain_filter(|docset| { - let horizon = min_doc + HORIZON as u32; - loop { - let doc = docset.doc(); - if doc >= horizon { - return false; - } - // add this document - let delta = doc - min_doc; - bitsets[(delta / 64) as usize].insert_mut(delta % 64u32); - if !docset.advance() { - // remove the docset, it has been entirely consumed. - return true; - } +fn refill( + scorers: &mut Vec, + bitsets: &mut [TinySet; HORIZON_NUM_TINYBITSETS], + score_combiner: &mut [TScoreCombiner; HORIZON as usize], + min_doc: DocId, +) { + scorers.drain_filter(|scorer| { + let horizon = min_doc + HORIZON as u32; + loop { + let doc = scorer.doc(); + if doc >= horizon { + return false; } - }); + // add this document + let delta = doc - min_doc; + bitsets[(delta / 64) as usize].insert_mut(delta % 64u32); + score_combiner[delta as usize].update(scorer); + if !scorer.advance() { + // remove the docset, it has been entirely consumed. + return true; + } + } + }); } -impl Union { +impl Union { fn refill(&mut self) -> bool { - if let Some(min_doc) = self.docsets - .iter_mut() - .map(|docset| docset.doc()) - .min() { + if let Some(min_doc) = self.docsets.iter_mut().map(|docset| docset.doc()).min() { self.offset = min_doc; self.cursor = 0; - refill(&mut self.docsets, &mut *self.bitsets, min_doc); + refill(&mut self.docsets, &mut *self.bitsets, &mut *self.scores, min_doc); self.advance(); true } else { @@ -94,8 +99,7 @@ impl Union DocSet for Union { - +impl DocSet for Union { fn advance(&mut self) -> bool { if self.advance_buffered() { return true; @@ -150,18 +154,12 @@ impl DocSet for Union= to the target. self.docsets - .drain_filter(|docset| { - match docset.doc().cmp(&target) { - Ordering::Less => { - match docset.skip_next(target) { - SkipResult::End => true, - SkipResult::Reached | SkipResult::OverStep => false - } - } - Ordering::Equal | Ordering::Greater => { - false - } - } + .drain_filter(|docset| match docset.doc().cmp(&target) { + Ordering::Less => match docset.skip_next(target) { + SkipResult::End => true, + SkipResult::Reached | SkipResult::OverStep => false, + }, + Ordering::Equal | Ordering::Greater => false, }); // at this point all of the docsets @@ -177,7 +175,6 @@ impl DocSet for Union DocId { @@ -189,13 +186,11 @@ impl DocSet for Union>) { use std::collections::BTreeSet; @@ -213,15 +209,14 @@ mod tests { val_set.insert(v); } } - let union_vals: Vec = val_set - .into_iter() - .collect(); + let union_vals: Vec = val_set.into_iter().collect(); let mut union_expected = VecPostings::from(union_vals); - let mut union = Union::from( + let mut union: Union<_, DoNothingCombiner> = Union::from( vals.into_iter() .map(VecPostings::from) - .collect::>() + .map(ConstScorer::new) + .collect::>>(), ); while union.advance() { assert!(union_expected.advance()); @@ -232,30 +227,25 @@ mod tests { #[test] fn test_union() { - aux_test_union( - vec![ - vec![1, 3333, 100000000u32], - vec![1,2, 100000000u32], - vec![1,2, 100000000u32], - vec![] - ] - ); - aux_test_union( - vec![ - vec![1, 3333, 100000000u32], - vec![1,2, 100000000u32], - vec![1,2, 100000000u32], - vec![] - ] - ); + aux_test_union(vec![ + vec![1, 3333, 100000000u32], + vec![1, 2, 100000000u32], + vec![1, 2, 100000000u32], + vec![], + ]); + aux_test_union(vec![ + vec![1, 3333, 100000000u32], + vec![1, 2, 100000000u32], + vec![1, 2, 100000000u32], + vec![], + ]); aux_test_union(vec![ tests::sample_with_seed(100_000, 0.01, 1), tests::sample_with_seed(100_000, 0.05, 2), - tests::sample_with_seed(100_000, 0.001, 3) + tests::sample_with_seed(100_000, 0.001, 3), ]); } - fn test_aux_union_skip(docs_list: &[Vec], skip_targets: Vec) { let mut btree_set = BTreeSet::new(); for docs in docs_list { @@ -264,12 +254,13 @@ mod tests { } } let docset_factory = || { - let res: Box = box Union::from( + let res: Box = box Union::<_, DoNothingCombiner>::from( docs_list .iter() .map(|docs| docs.clone()) .map(VecPostings::from) - .collect::>() + .map(ConstScorer::new) + .collect::>(), ); res }; @@ -282,29 +273,24 @@ mod tests { test_skip_against_unoptimized(docset_factory, skip_targets); } - #[test] fn test_union_skip_corner_case() { - test_aux_union_skip( - &[vec![165132, 167382], vec![25029, 25091]], - vec![25029], - ); + test_aux_union_skip(&[vec![165132, 167382], vec![25029, 25091]], vec![25029]); } #[test] fn test_union_skip_corner_case2() { test_aux_union_skip( - &[ - vec![1u32, 1u32 + HORIZON], - vec![2u32, 1000u32, 10_000u32] - ], vec![0u32, 1u32, 2u32, 3u32, 1u32 + HORIZON, 2u32 + HORIZON]); + &[vec![1u32, 1u32 + HORIZON], vec![2u32, 1000u32, 10_000u32]], + vec![0u32, 1u32, 2u32, 3u32, 1u32 + HORIZON, 2u32 + HORIZON], + ); } #[test] fn test_union_skip_corner_case3() { - let mut docset = Union::from(vec![ - VecPostings::from(vec![0u32, 5u32]), - VecPostings::from(vec![1u32, 4u32]), + let mut docset = Union::<_, DoNothingCombiner>::from(vec![ + ConstScorer::new(VecPostings::from(vec![0u32, 5u32])), + ConstScorer::new(VecPostings::from(vec![1u32, 4u32])) ]); assert!(docset.advance()); assert_eq!(docset.doc(), 0u32); @@ -314,53 +300,70 @@ mod tests { #[test] fn test_union_skip_random() { - test_aux_union_skip(&[ - vec![1,2,3,7], - vec![1,3,9,10000], - vec![1,3,8,9,100] - ], vec![1,2,3,5,6,7,8,100]); - test_aux_union_skip(&[ - tests::sample_with_seed(100_000, 0.001, 1), - tests::sample_with_seed(100_000, 0.002, 2), - tests::sample_with_seed(100_000, 0.005, 3) - ], tests::sample_with_seed(100_000, 0.01, 4)); + test_aux_union_skip( + &[ + vec![1, 2, 3, 7], + vec![1, 3, 9, 10000], + vec![1, 3, 8, 9, 100], + ], + vec![1, 2, 3, 5, 6, 7, 8, 100], + ); + test_aux_union_skip( + &[ + tests::sample_with_seed(100_000, 0.001, 1), + tests::sample_with_seed(100_000, 0.002, 2), + tests::sample_with_seed(100_000, 0.005, 3), + ], + tests::sample_with_seed(100_000, 0.01, 4), + ); } #[test] fn test_union_skip_specific() { - test_aux_union_skip(&[ - vec![1,2,3,7], - vec![1,3,9,10000], - vec![1,3,8,9,100] - ], vec![1,2,3,7,8,9,99,100,101,500,20000]); + test_aux_union_skip( + &[ + vec![1, 2, 3, 7], + vec![1, 3, 9, 10000], + vec![1, 3, 8, 9, 100], + ], + vec![1, 2, 3, 7, 8, 9, 99, 100, 101, 500, 20000], + ); } #[bench] fn bench_union_3_high(bench: &mut Bencher) { - let union_docset: Vec> = vec![ + let union_docset: Vec> = vec![ tests::sample_with_seed(100_000, 0.1, 0), tests::sample_with_seed(100_000, 0.2, 1), ]; bench.iter(|| { - let mut v = Union::from(union_docset.iter() - .map(|doc_ids| VecPostings::from(doc_ids.clone())) - .collect::>()); - while v.advance() {}; + let mut v = Union::<_, DoNothingCombiner>::from( + union_docset + .iter() + .map(|doc_ids| VecPostings::from(doc_ids.clone())) + .map(ConstScorer::new) + .collect::>(), + ); + while v.advance() {} }); } #[bench] fn bench_union_3_low(bench: &mut Bencher) { - let union_docset: Vec> = vec![ + let union_docset: Vec> = vec![ tests::sample_with_seed(100_000, 0.01, 0), tests::sample_with_seed(100_000, 0.05, 1), - tests::sample_with_seed(100_000, 0.001, 2) + tests::sample_with_seed(100_000, 0.001, 2), ]; bench.iter(|| { - let mut v = Union::from(union_docset.iter() - .map(|doc_ids| VecPostings::from(doc_ids.clone())) - .collect::>()); - while v.advance() {}; + let mut v = Union::<_, DoNothingCombiner>::from( + union_docset + .iter() + .map(|doc_ids| VecPostings::from(doc_ids.clone())) + .map(ConstScorer::new) + .collect::>(), + ); + while v.advance() {} }); } -} \ No newline at end of file +} diff --git a/src/postings/vec_postings.rs b/src/postings/vec_postings.rs index 54de71861..51c402cd6 100644 --- a/src/postings/vec_postings.rs +++ b/src/postings/vec_postings.rs @@ -56,7 +56,6 @@ impl Postings for VecPostings { } } - #[cfg(test)] pub mod tests { diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs index 481efb55e..18ceff404 100644 --- a/src/query/boolean_query/boolean_query.rs +++ b/src/query/boolean_query/boolean_query.rs @@ -22,14 +22,14 @@ use query::Occur; #[derive(Debug)] pub struct BooleanQuery { subqueries: Vec<(Occur, Box)>, - scoring_disabled: bool + scoring_disabled: bool, } impl From)>> for BooleanQuery { fn from(subqueries: Vec<(Occur, Box)>) -> BooleanQuery { BooleanQuery { subqueries, - scoring_disabled: false + scoring_disabled: false, } } } @@ -49,9 +49,7 @@ impl Query for BooleanQuery { fn weight(&self, searcher: &Searcher) -> Result> { let sub_weights = self.subqueries .iter() - .map(|&(ref occur, ref subquery)| { - Ok((*occur, subquery.weight(searcher)?)) - }) + .map(|&(ref occur, ref subquery)| Ok((*occur, subquery.weight(searcher)?))) .collect::>()?; Ok(box BooleanWeight::new(sub_weights, self.scoring_disabled)) } diff --git a/src/query/boolean_query/boolean_scorer.rs b/src/query/boolean_query/boolean_scorer.rs deleted file mode 100644 index 0b28c58e0..000000000 --- a/src/query/boolean_query/boolean_scorer.rs +++ /dev/null @@ -1,147 +0,0 @@ -use query::Scorer; -use DocId; -use std::collections::BinaryHeap; -use std::cmp::Ordering; -use postings::DocSet; -use query::OccurFilter; -use query::score_combiner::{ScoreCombiner, SumWithCoordsCombiner}; - -/// Each `HeapItem` represents the head of -/// one of scorer being merged. -/// -/// * `doc` - is the current doc id for the given segment postings -/// * `ord` - is the ordinal used to identify to which segment postings -/// this heap item belong to. -#[derive(Eq, PartialEq)] -struct HeapItem { - doc: DocId, - ord: u32, -} - -/// `HeapItem` are ordered by the document -impl PartialOrd for HeapItem { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for HeapItem { - fn cmp(&self, other: &Self) -> Ordering { - (other.doc).cmp(&self.doc) - } -} - -pub struct BooleanScorer { - scorers: Vec, - queue: BinaryHeap, - doc: DocId, - score_combiner: ScoreCombiner, - occur_filter: OccurFilter, -} - -impl BooleanScorer { - pub fn new(scorers: Vec, occur_filter: OccurFilter) -> BooleanScorer { - let score_combiner = ScoreCombiner::default_for_num_scorers(scorers.len()); - let mut non_empty_scorers: Vec = Vec::new(); - for mut posting in scorers { - let non_empty = posting.advance(); - if non_empty { - non_empty_scorers.push(posting); - } - } - let heap_items: Vec = non_empty_scorers - .iter() - .map(|posting| posting.doc()) - .enumerate() - .map(|(ord, doc)| HeapItem { - doc, - ord: ord as u32, - }) - .collect(); - BooleanScorer { - scorers: non_empty_scorers, - queue: BinaryHeap::from(heap_items), - doc: 0u32, - score_combiner, - occur_filter, - } - } - - /// Advances the head of our heap (the segment posting with the lowest doc) - /// It will also update the new current `DocId` as well as the term frequency - /// associated with the segment postings. - /// - /// After advancing the `SegmentPosting`, the postings is removed from the heap - /// if it has been entirely consumed, or pushed back into the heap. - /// - /// # Panics - /// This method will panic if the head `SegmentPostings` is not empty. - fn advance_head(&mut self) { - { - let mut mutable_head = self.queue.peek_mut().unwrap(); - let cur_scorers = &mut self.scorers[mutable_head.ord as usize]; - if cur_scorers.advance() { - mutable_head.doc = cur_scorers.doc(); - return; - } - } - self.queue.pop(); - } -} - -impl DocSet for BooleanScorer { - fn advance(&mut self) -> bool { - loop { - self.score_combiner.clear(); - let mut ord_bitset = 0u64; - match self.queue.peek() { - Some(heap_item) => { - let ord = heap_item.ord as usize; - self.doc = heap_item.doc; - let score = self.scorers[ord].score(); - self.score_combiner.update(score); - ord_bitset |= 1 << ord; - } - None => { - return false; - } - } - self.advance_head(); - while let Some(&HeapItem { doc, ord }) = self.queue.peek() { - if doc == self.doc { - let ord = ord as usize; - let score = self.scorers[ord].score(); - self.score_combiner.update(score); - ord_bitset |= 1 << ord; - } else { - break; - } - self.advance_head(); - } - if self.occur_filter.accept(ord_bitset) { - return true; - } - } - } - - fn doc(&self) -> DocId { - self.doc - } - - fn size_hint(&self) -> u32 { - // TODO fix this. it should be the min - // of the MUST scorer - // and the max of the SHOULD scorers. - self.scorers - .iter() - .map(|scorer| scorer.size_hint()) - .max() - .unwrap() - } -} - -impl Scorer for BooleanScorer { - fn score(&mut self) -> f32 { - self.score_combiner.score() - } -} diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index e6e9cfd0a..e88ed5168 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -5,41 +5,39 @@ use std::collections::HashMap; use query::EmptyScorer; use query::Scorer; use query::Exclude; -use super::BooleanScorer; -use query::OccurFilter; use query::ConstScorer; use query::Occur; use query::RequiredOptionalScorer; +use query::score_combiner::{SumWithCoordsCombiner, DoNothingCombiner, ScoreCombiner}; use Result; - -fn scorer_union<'a>(docsets: Vec>) -> Box { +fn scorer_union<'a, TScoreCombiner: ScoreCombiner + 'static>(docsets: Vec>) -> Box { assert!(!docsets.is_empty()); if docsets.len() == 1 { - docsets - .into_iter() - .next() - .unwrap() //< we checked the size beforehands + docsets.into_iter().next().unwrap() //< we checked the size beforehands } else { // TODO have a UnionScorer instead. - box ConstScorer::new(Union::from(docsets)) + box ConstScorer::new(Union::<_, TScoreCombiner>::from(docsets)) } } pub struct BooleanWeight { weights: Vec<(Occur, Box)>, - scoring_disabled: bool + scoring_disabled: bool, } impl BooleanWeight { pub fn new(weights: Vec<(Occur, Box)>, scoring_disabled: bool) -> BooleanWeight { BooleanWeight { weights, - scoring_disabled + scoring_disabled, } } - fn scorer_if_scoring_disabled<'a>(&'a self, reader: &'a SegmentReader) -> Result> { + fn complex_scorer<'a, TScoreCombiner: ScoreCombiner + 'static>( + &'a self, + reader: &'a SegmentReader, + ) -> Result> { let mut per_occur_scorers: HashMap>> = HashMap::new(); for &(ref occur, ref subweight) in self.weights.iter() { let sub_scorer: Box = subweight.scorer(reader)?; @@ -49,28 +47,32 @@ impl BooleanWeight { .push(sub_scorer); } - let should_scorer_opt: Option> = per_occur_scorers - .remove(&Occur::Should) - .map(scorer_union); + let should_scorer_opt: Option> = + per_occur_scorers.remove(&Occur::Should).map(scorer_union::); - let exclude_scorer_opt: Option> = per_occur_scorers - .remove(&Occur::MustNot) - .map(scorer_union); + let exclude_scorer_opt: Option> = + per_occur_scorers.remove(&Occur::MustNot).map(scorer_union::); - let must_scorer_opt: Option> = per_occur_scorers - .remove(&Occur::Must) - .map(|scorers| { - let scorer: Box = box ConstScorer::new(Intersection::from(scorers)); - scorer + let must_scorer_opt: Option> = + per_occur_scorers.remove(&Occur::Must).map(|scorers| { + if scorers.len() == 1 { + scorers.into_iter().next().unwrap() + } else { + let scorer: Box = box Intersection::from(scorers); + scorer + } }); let positive_scorer: Box = match (should_scorer_opt, must_scorer_opt) { - (Some(should_scorer), Some(must_scorer)) => - box RequiredOptionalScorer::new(must_scorer, should_scorer), - (None, Some(must_scorer)) => - must_scorer, - (Some(should_scorer), None) => - should_scorer, + (Some(should_scorer), Some(must_scorer)) => { + if self.scoring_disabled { + must_scorer + } else { + box RequiredOptionalScorer::<_,_,TScoreCombiner>::new(must_scorer, should_scorer) + } + } + (None, Some(must_scorer)) => must_scorer, + (Some(should_scorer), None) => should_scorer, (None, None) => { return Ok(box EmptyScorer); } @@ -83,38 +85,37 @@ impl BooleanWeight { } } - fn scorer_if_scoring_enabled<'a>(&'a self, reader: &'a SegmentReader) -> Result> { - let sub_scorers: Vec> = self.weights - .iter() - .map(|&(_, ref weight)| weight) - .map(|weight| weight.scorer(reader)) - .collect::>()?; - let occurs: Vec = self.weights - .iter() - .map(|&(ref occur, _)| *occur) - .collect(); - let occur_filter = OccurFilter::new(&occurs); - let boolean_scorer = BooleanScorer::new(sub_scorers, occur_filter); - Ok(box boolean_scorer) - } +// fn scorer_if_scoring_enabled<'a>( +// &'a self, +// reader: &'a SegmentReader, +// ) -> Result> { +// let sub_scorers: Vec> = self.weights +// .iter() +// .map(|&(_, ref weight)| weight) +// .map(|weight| weight.scorer(reader)) +// .collect::>()?; +// let occurs: Vec = self.weights.iter().map(|&(ref occur, _)| *occur).collect(); +// let occur_filter = OccurFilter::new(&occurs); +// let boolean_scorer = BooleanScorer::new(sub_scorers, occur_filter); +// Ok(box boolean_scorer) +// } } - impl Weight for BooleanWeight { fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result> { if self.weights.is_empty() { Ok(box EmptyScorer) } else if self.weights.len() == 1 { let &(occur, ref weight) = &self.weights[0]; - if occur == Occur::MustNot { + if occur == Occur::MustNot { Ok(box EmptyScorer) } else { weight.scorer(reader) } } else if self.scoring_disabled { - self.scorer_if_scoring_disabled(reader) + self.complex_scorer::(reader) } else { - self.scorer_if_scoring_enabled(reader) + self.complex_scorer::(reader) } - } + } } diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index 524da536f..e7c3b48ac 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -1,18 +1,15 @@ mod boolean_query; -mod boolean_scorer; +//mod boolean_scorer; mod boolean_weight; pub use self::boolean_query::BooleanQuery; -pub use self::boolean_scorer::BooleanScorer; +//pub use self::boolean_scorer::BooleanScorer; #[cfg(test)] mod tests { use super::*; - use postings::{DocSet, VecPostings}; use query::Scorer; - use query::OccurFilter; - use query::term_query::TermScorer; use query::Occur; use query::Query; use query::TermQuery; @@ -111,40 +108,40 @@ mod tests { } } - #[test] - pub fn test_boolean_scorer() { - let occurs = vec![Occur::Should, Occur::Should]; - let occur_filter = OccurFilter::new(&occurs); - - let left_fieldnorms = - U64FastFieldReader::from((0u64..9u64).map(|doc| doc * 3).collect::>()); - - let left = VecPostings::from(vec![1, 2, 3]); - let left_scorer = TermScorer { - idf: 1f32, - fieldnorm_reader_opt: Some(left_fieldnorms), - postings: left, - }; - - let right_fieldnorms = - U64FastFieldReader::from((0u64..9u64).map(|doc| doc * 5).collect::>()); - let right = VecPostings::from(vec![1, 3, 8]); - - let right_scorer = TermScorer { - idf: 4f32, - fieldnorm_reader_opt: Some(right_fieldnorms), - postings: right, - }; - - let mut boolean_scorer = BooleanScorer::new(vec![left_scorer, right_scorer], occur_filter); - assert_eq!(boolean_scorer.next(), Some(1u32)); - assert!(abs_diff(boolean_scorer.score(), 2.3662047) < 0.001); - assert_eq!(boolean_scorer.next(), Some(2u32)); - assert!(abs_diff(boolean_scorer.score(), 0.20412415) < 0.001f32); - assert_eq!(boolean_scorer.next(), Some(3u32)); - assert_eq!(boolean_scorer.next(), Some(8u32)); - assert!(abs_diff(boolean_scorer.score(), 0.31622776) < 0.001f32); - assert!(!boolean_scorer.advance()); - } +// #[test] +// pub fn test_boolean_scorer() { +// let occurs = vec![Occur::Should, Occur::Should]; +// let occur_filter = OccurFilter::new(&occurs); +// +// let left_fieldnorms = +// U64FastFieldReader::from((0u64..9u64).map(|doc| doc * 3).collect::>()); +// +// let left = VecPostings::from(vec![1, 2, 3]); +// let left_scorer = TermScorer { +// idf: 1f32, +// fieldnorm_reader_opt: Some(left_fieldnorms), +// postings: left, +// }; +// +// let right_fieldnorms = +// U64FastFieldReader::from((0u64..9u64).map(|doc| doc * 5).collect::>()); +// let right = VecPostings::from(vec![1, 3, 8]); +// +// let right_scorer = TermScorer { +// idf: 4f32, +// fieldnorm_reader_opt: Some(right_fieldnorms), +// postings: right, +// }; +// +// let mut boolean_scorer = BooleanScorer::new(vec![left_scorer, right_scorer], occur_filter); +// assert_eq!(boolean_scorer.next(), Some(1u32)); +// assert!(abs_diff(boolean_scorer.score(), 2.3662047) < 0.001); +// assert_eq!(boolean_scorer.next(), Some(2u32)); +// assert!(abs_diff(boolean_scorer.score(), 0.20412415) < 0.001f32); +// assert_eq!(boolean_scorer.next(), Some(3u32)); +// assert_eq!(boolean_scorer.next(), Some(8u32)); +// assert!(abs_diff(boolean_scorer.score(), 0.31622776) < 0.001f32); +// assert!(!boolean_scorer.advance()); +// } } diff --git a/src/query/exclude.rs b/src/query/exclude.rs index 746c31bd2..c82d0e252 100644 --- a/src/query/exclude.rs +++ b/src/query/exclude.rs @@ -7,7 +7,7 @@ use DocId; #[derive(Clone, Copy, Debug)] enum State { ExcludeOne(DocId), - Finished + Finished, } /// Filters a given `DocSet` by removing the docs from a given `DocSet`. @@ -19,18 +19,20 @@ pub struct Exclude { excluding_state: State, } - impl Exclude - where TDocSetExclude: DocSet { - +where + TDocSetExclude: DocSet, +{ /// Creates a new `ExcludeScorer` - pub fn new(underlying_docset: TDocSet, mut excluding_docset: TDocSetExclude) -> Exclude { - let state = - if excluding_docset.advance() { - State::ExcludeOne(excluding_docset.doc()) - } else { - State::Finished - }; + pub fn new( + underlying_docset: TDocSet, + mut excluding_docset: TDocSetExclude, + ) -> Exclude { + let state = if excluding_docset.advance() { + State::ExcludeOne(excluding_docset.doc()) + } else { + State::Finished + }; Exclude { underlying_docset, excluding_docset, @@ -40,8 +42,10 @@ impl Exclude } impl Exclude - where TDocSet: DocSet, TDocSetExclude: DocSet { - +where + TDocSet: DocSet, + TDocSetExclude: DocSet, +{ /// Returns true iff the doc is not removed. /// /// The method has to be called with non strictly @@ -64,22 +68,20 @@ impl Exclude self.excluding_state = State::Finished; true } - SkipResult::Reached => { - false - } + SkipResult::Reached => false, } } } - State::Finished => { - true - } + State::Finished => true, } } } impl DocSet for Exclude - where TDocSet: DocSet, TDocSetExclude: DocSet { - +where + TDocSet: DocSet, + TDocSetExclude: DocSet, +{ fn advance(&mut self) -> bool { while self.underlying_docset.advance() { if self.accept() { @@ -101,7 +103,6 @@ impl DocSet for Exclude } else { SkipResult::End } - } fn doc(&self) -> DocId { @@ -116,9 +117,11 @@ impl DocSet for Exclude } } - impl Scorer for Exclude - where TScorer: Scorer, TDocSetExclude: DocSet { +where + TScorer: Scorer, + TDocSetExclude: DocSet, +{ fn score(&mut self) -> Score { self.underlying_docset.score() } @@ -135,24 +138,26 @@ mod tests { #[test] fn test_exclude() { let mut exclude_scorer = Exclude::new( - VecPostings::from(vec![1,2,5,8,10,15,24]), - VecPostings::from(vec![1,2,3,10,16,24]) + VecPostings::from(vec![1, 2, 5, 8, 10, 15, 24]), + VecPostings::from(vec![1, 2, 3, 10, 16, 24]), ); let mut els = vec![]; while exclude_scorer.advance() { els.push(exclude_scorer.doc()); } - assert_eq!(els, vec![5,8,15]); + assert_eq!(els, vec![5, 8, 15]); } #[test] fn test_exclude_skip() { test_skip_against_unoptimized( - || box Exclude::new( - VecPostings::from(vec![1, 2, 5, 8, 10, 15, 24]), - VecPostings::from(vec![1, 2, 3, 10, 16, 24]) - ), - vec![1, 2, 5, 8, 10, 15, 24] + || { + box Exclude::new( + VecPostings::from(vec![1, 2, 5, 8, 10, 15, 24]), + VecPostings::from(vec![1, 2, 3, 10, 16, 24]), + ) + }, + vec![1, 2, 5, 8, 10, 15, 24], ); } @@ -162,12 +167,14 @@ mod tests { let sample_exclude = sample_with_seed(10_000, 0.05, 2); let sample_skip = sample_with_seed(10_000, 0.005, 3); test_skip_against_unoptimized( - || box Exclude::new( - VecPostings::from(sample_include.clone()), - VecPostings::from(sample_exclude.clone()) - ), - sample_skip + || { + box Exclude::new( + VecPostings::from(sample_include.clone()), + VecPostings::from(sample_exclude.clone()), + ) + }, + sample_skip, ); } -} \ No newline at end of file +} diff --git a/src/query/mod.rs b/src/query/mod.rs index f77f48972..21a73f99d 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -7,7 +7,6 @@ mod boolean_query; mod scorer; mod occur; mod weight; -mod occur_filter; mod term_query; mod query_parser; mod phrase_query; @@ -22,7 +21,6 @@ pub use self::reqopt_scorer::RequiredOptionalScorer; pub use self::exclude::Exclude; pub use self::bitset::BitSetDocSet; pub use self::boolean_query::BooleanQuery; -pub use self::occur_filter::OccurFilter; pub use self::occur::Occur; pub use self::phrase_query::PhraseQuery; pub use self::query_parser::QueryParserError; @@ -35,4 +33,3 @@ pub use self::weight::Weight; pub use self::all_query::{AllQuery, AllScorer, AllWeight}; pub use self::range_query::RangeQuery; pub use self::scorer::ConstScorer; - diff --git a/src/query/occur_filter.rs b/src/query/occur_filter.rs deleted file mode 100644 index 8e4499d8f..000000000 --- a/src/query/occur_filter.rs +++ /dev/null @@ -1,39 +0,0 @@ -use query::Occur; - -/// An `OccurFilter` represents a filter over a bitset of -/// at most 64 elements. -/// -/// It wraps some simple bitmask to compute the filter -/// rapidly. -#[derive(Clone, Copy)] -pub struct OccurFilter { - and_mask: u64, - result: u64, -} - -impl OccurFilter { - /// Returns true if the bitset is matching the occur list. - pub fn accept(&self, ord_set: u64) -> bool { - (self.and_mask & ord_set) == self.result - } - - /// Builds an `OccurFilter` from a list of `Occur`. - pub fn new(occurs: &[Occur]) -> OccurFilter { - let mut and_mask = 0u64; - let mut result = 0u64; - for (i, occur) in occurs.iter().enumerate() { - let shift = 1 << i; - match *occur { - Occur::Must => { - and_mask |= shift; - result |= shift; - } - Occur::MustNot => { - and_mask |= shift; - } - Occur::Should => {} - } - } - OccurFilter { and_mask, result } - } -} diff --git a/src/query/query.rs b/src/query/query.rs index 9b0d70f99..bad1b970d 100644 --- a/src/query/query.rs +++ b/src/query/query.rs @@ -45,7 +45,6 @@ pub trait Query: fmt::Debug { /// into a specific type. This is mostly useful for unit tests. fn as_any(&self) -> &Any; - /// Disable scoring. /// /// For some query this may improve performance diff --git a/src/query/reqopt_scorer.rs b/src/query/reqopt_scorer.rs index 2d1fbb679..144cf9b38 100644 --- a/src/query/reqopt_scorer.rs +++ b/src/query/reqopt_scorer.rs @@ -1,9 +1,11 @@ use DocId; use DocSet; use query::Scorer; +use query::score_combiner::{ScoreCombiner, SumCombiner}; use Score; use postings::SkipResult; use std::cmp::Ordering; +use std::marker::PhantomData; /// Given a required scorer and an optional scorer /// matches all document from the required scorer @@ -12,30 +14,38 @@ use std::cmp::Ordering; /// This is useful for queries like `+somethingrequired somethingoptional`. /// /// Note that `somethingoptional` has no impact on the `DocSet`. -pub struct RequiredOptionalScorer { +pub struct RequiredOptionalScorer { req_scorer: TReqScorer, opt_scorer: TOptScorer, score_cache: Option, opt_finished: bool, + _phantom: PhantomData } -impl RequiredOptionalScorer - where TOptScorer: DocSet { - +impl RequiredOptionalScorer +where + TOptScorer: DocSet, +{ /// Creates a new `RequiredOptionalScorer`. - pub fn new(req_scorer: TReqScorer, mut opt_scorer: TOptScorer) -> RequiredOptionalScorer { + pub fn new( + req_scorer: TReqScorer, + mut opt_scorer: TOptScorer, + ) -> RequiredOptionalScorer { let opt_finished = !opt_scorer.advance(); RequiredOptionalScorer { req_scorer, opt_scorer, score_cache: None, - opt_finished + opt_finished, + _phantom: PhantomData } } } -impl DocSet for RequiredOptionalScorer - where TReqScorer: DocSet, TOptScorer: DocSet +impl DocSet for RequiredOptionalScorer +where + TReqScorer: DocSet, + TOptScorer: DocSet, { fn advance(&mut self) -> bool { self.score_cache = None; @@ -55,42 +65,42 @@ impl DocSet for RequiredOptionalScorer Scorer for RequiredOptionalScorer - where TReqScorer: Scorer, TOptScorer: Scorer { - +impl Scorer for RequiredOptionalScorer +where + TReqScorer: Scorer, + TOptScorer: Scorer, + TScoreCombiner: ScoreCombiner +{ fn score(&mut self) -> Score { if let Some(score) = self.score_cache { return score; } let doc = self.doc(); - let mut score = self.req_scorer.score(); - if self.opt_finished { - return score; - } - match self.opt_scorer.doc().cmp(&doc) { - Ordering::Greater => {} - Ordering::Equal => { - score += self.opt_scorer.score(); - } - Ordering::Less => { - match self.opt_scorer.skip_next(doc) { + let mut score_combiner = TScoreCombiner::default(); + score_combiner.update(&mut self.req_scorer); + if !self.opt_finished { + match self.opt_scorer.doc().cmp(&doc) { + Ordering::Greater => {} + Ordering::Equal => { + score_combiner.update(&mut self.opt_scorer); + } + Ordering::Less => match self.opt_scorer.skip_next(doc) { SkipResult::Reached => { - score += self.opt_scorer.score(); + score_combiner.update(&mut self.opt_scorer); } SkipResult::End => { self.opt_finished = true; } SkipResult::OverStep => {} - } + }, } } + let score = score_combiner.score(); self.score_cache = Some(score); score } } - #[cfg(test)] mod tests { use tests::sample_with_seed; @@ -100,14 +110,14 @@ mod tests { use DocSet; use postings::tests::test_skip_against_unoptimized; use query::Scorer; - + use query::score_combiner::{DoNothingCombiner, SumCombiner}; #[test] fn test_reqopt_scorer_empty() { let req = vec![1, 3, 7]; - let mut reqoptscorer = RequiredOptionalScorer::new( + let mut reqoptscorer: RequiredOptionalScorer<_, _, SumCombiner> = RequiredOptionalScorer::new( ConstScorer::new(VecPostings::from(req.clone())), - ConstScorer::new(VecPostings::from(vec![])) + ConstScorer::new(VecPostings::from(vec![])), ); let mut docs = vec![]; while reqoptscorer.advance() { @@ -118,9 +128,9 @@ mod tests { #[test] fn test_reqopt_scorer() { - let mut reqoptscorer = RequiredOptionalScorer::new( - ConstScorer::new(VecPostings::from(vec![1,3,7,8,9,10,13,15])), - ConstScorer::new(VecPostings::from(vec![1,2,7,11,12,15])) + let mut reqoptscorer: RequiredOptionalScorer<_,_,SumCombiner> = RequiredOptionalScorer::new( + ConstScorer::new(VecPostings::from(vec![1, 3, 7, 8, 9, 10, 13, 15])), + ConstScorer::new(VecPostings::from(vec![1, 2, 7, 11, 12, 15])), ); { assert!(reqoptscorer.advance()); @@ -170,12 +180,15 @@ mod tests { let req_docs = sample_with_seed(10_000, 0.02, 1); let opt_docs = sample_with_seed(10_000, 0.02, 2); let skip_docs = sample_with_seed(10_000, 0.001, 3); - test_skip_against_unoptimized(|| - box RequiredOptionalScorer::new( - ConstScorer::new(VecPostings::from(req_docs.clone())), - ConstScorer::new(VecPostings::from(opt_docs.clone())) - ), skip_docs); + test_skip_against_unoptimized( + || { + box RequiredOptionalScorer::<_,_,DoNothingCombiner>::new( + ConstScorer::new(VecPostings::from(req_docs.clone())), + ConstScorer::new(VecPostings::from(opt_docs.clone())), + ) + }, + skip_docs, + ); } - -} \ No newline at end of file +} diff --git a/src/query/score_combiner.rs b/src/query/score_combiner.rs index b20ea851a..0111d7b03 100644 --- a/src/query/score_combiner.rs +++ b/src/query/score_combiner.rs @@ -1,15 +1,17 @@ use Score; +use query::Scorer; -pub trait ScoreCombiner: Default + Copy { - fn update(&mut self, score: Score); +pub trait ScoreCombiner: Default + Clone + Copy { + fn update(&mut self, scorer: &mut TScorer); fn clear(&mut self); fn score(&self) -> Score; } - +#[derive(Default, Clone, Copy)] //< these should not be too much work :) pub struct DoNothingCombiner; + impl ScoreCombiner for DoNothingCombiner { - fn update(&mut self, score: Score) {} + fn update(&mut self, _scorer: &mut TScorer) {} fn clear(&mut self) {} @@ -18,15 +20,35 @@ impl ScoreCombiner for DoNothingCombiner { } } +#[derive(Default, Clone, Copy)] +pub struct SumCombiner { + score: Score +} + + +impl ScoreCombiner for SumCombiner { + fn update(&mut self, scorer: &mut TScorer) { + self.score += scorer.score(); + } + + fn clear(&mut self) { + self.score = 0f32; + } + + fn score(&self) -> Score { + self.score + } +} + +#[derive(Default, Clone, Copy)] pub struct SumWithCoordsCombiner { - coords: Vec, num_fields: usize, score: Score, } impl ScoreCombiner for SumWithCoordsCombiner { - fn update(&mut self, score: Score) { - self.score += score; + fn update(&mut self, scorer: &mut TScorer) { + self.score += scorer.score(); self.num_fields += 1; } @@ -36,32 +58,7 @@ impl ScoreCombiner for SumWithCoordsCombiner { } fn score(&self) -> Score { - self.score * self.coord() - } - -} - -impl SumWithCoordsCombiner { - /// Compute the coord term - fn coord(&self) -> f32 { - self.coords[self.num_fields] - } - - - pub fn default_for_num_scorers(num_scorers: usize) -> Self { - let query_coords: Vec = (0..num_scorers + 1) - .map(|i| (i as Score) / (num_scorers as Score)) - .collect(); - ScoreCombiner::from(query_coords) + self.score } } -impl From> for ScoreCombiner { - fn from(coords: Vec) -> SumWithCoordsCombiner { - SumWithCoordsCombiner { - coords, - num_fields: 0, - score: 0f32, - } - } -} diff --git a/src/query/term_query/term_query.rs b/src/query/term_query/term_query.rs index 95df496a5..1971fc660 100644 --- a/src/query/term_query/term_query.rs +++ b/src/query/term_query/term_query.rs @@ -41,7 +41,7 @@ impl TermQuery { num_docs: searcher.num_docs(), doc_freq: searcher.doc_freq(&self.term), term: self.term.clone(), - index_record_option: self.index_record_option + index_record_option: self.index_record_option, } } } diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index 66c1c2029..c6cf7e8f6 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -11,7 +11,7 @@ pub struct TermWeight { pub(crate) num_docs: u32, pub(crate) doc_freq: u32, pub(crate) term: Term, - pub(crate) index_record_option: IndexRecordOption + pub(crate) index_record_option: IndexRecordOption, } impl Weight for TermWeight {