diff --git a/src/postings/mod.rs b/src/postings/mod.rs index efcdc811a..c4d0162b7 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -31,7 +31,7 @@ pub use self::vec_postings::VecPostings; pub use self::segment_postings::{BlockSegmentPostings, SegmentPostings}; pub use self::intersection::Intersection; -pub use self::union::UnionDocSet; +pub use self::union::Union; pub use common::HasLen; diff --git a/src/postings/union.rs b/src/postings/union.rs index 9efdcfcfb..575a0f69b 100644 --- a/src/postings/union.rs +++ b/src/postings/union.rs @@ -3,22 +3,25 @@ use postings::SkipResult; use common::TinySet; use std::cmp::Ordering; use DocId; +use query::score_combiner::{DoNothingCombiner, ScoreCombiner}; -const HORIZON_NUM_TINYBITSETS: usize = 2048; +const HORIZON_NUM_TINYBITSETS: usize = 32; const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32; /// Creates a `DocSet` that iterator through the intersection of two `DocSet`s. -pub struct UnionDocSet { +pub struct Union + where TDocSet: DocSet, TScoreCombiner: ScoreCombiner { docsets: Vec, bitsets: Box<[TinySet; HORIZON_NUM_TINYBITSETS]>, + scores: Box<[TScoreCombiner; HORIZON as usize]>, cursor: usize, offset: DocId, doc: DocId, } -impl From> for UnionDocSet { - fn from(docsets: Vec) -> UnionDocSet { +impl From> for Union { + fn from(docsets: Vec) -> Union { let non_empty_docsets: Vec = docsets .into_iter() @@ -30,9 +33,10 @@ impl From> for UnionDocSet { } }) .collect(); - UnionDocSet { + Union { docsets: non_empty_docsets, bitsets: Box::new([TinySet::empty(); HORIZON_NUM_TINYBITSETS]), + scores: Box::new([TScoreCombiner::default(); HORIZON]), cursor: HORIZON_NUM_TINYBITSETS, offset: 0, doc: 0 @@ -61,7 +65,7 @@ fn refill(docsets: &mut Vec, bitsets: &mut [TinySet; H }); } -impl UnionDocSet { +impl Union { fn refill(&mut self) -> bool { if let Some(min_doc) = self.docsets .iter_mut() @@ -90,7 +94,7 @@ impl UnionDocSet { } } -impl DocSet for UnionDocSet { +impl DocSet for Union { fn advance(&mut self) -> bool { if self.advance_buffered() { @@ -190,7 +194,7 @@ impl DocSet for UnionDocSet { #[cfg(test)] mod tests { - use super::UnionDocSet; + use super::Union; use postings::{VecPostings, DocSet}; use tests; use test::Bencher; @@ -214,7 +218,7 @@ mod tests { .collect(); let mut union_expected = VecPostings::from(union_vals); - let mut union = UnionDocSet::from( + let mut union = Union::from( vals.into_iter() .map(VecPostings::from) .collect::>() @@ -260,7 +264,7 @@ mod tests { } } let docset_factory = || { - let res: Box = box UnionDocSet::from( + let res: Box = box Union::from( docs_list .iter() .map(|docs| docs.clone()) @@ -298,7 +302,7 @@ mod tests { #[test] fn test_union_skip_corner_case3() { - let mut docset = UnionDocSet::from(vec![ + let mut docset = Union::from(vec![ VecPostings::from(vec![0u32, 5u32]), VecPostings::from(vec![1u32, 4u32]), ]); @@ -338,7 +342,7 @@ mod tests { tests::sample_with_seed(100_000, 0.2, 1), ]; bench.iter(|| { - let mut v = UnionDocSet::from(union_docset.iter() + let mut v = Union::from(union_docset.iter() .map(|doc_ids| VecPostings::from(doc_ids.clone())) .collect::>()); while v.advance() {}; @@ -352,7 +356,7 @@ mod tests { tests::sample_with_seed(100_000, 0.001, 2) ]; bench.iter(|| { - let mut v = UnionDocSet::from(union_docset.iter() + let mut v = Union::from(union_docset.iter() .map(|doc_ids| VecPostings::from(doc_ids.clone())) .collect::>()); while v.advance() {}; diff --git a/src/query/boolean_query/boolean_scorer.rs b/src/query/boolean_query/boolean_scorer.rs index 779c908a6..0b28c58e0 100644 --- a/src/query/boolean_query/boolean_scorer.rs +++ b/src/query/boolean_query/boolean_scorer.rs @@ -4,7 +4,7 @@ use std::collections::BinaryHeap; use std::cmp::Ordering; use postings::DocSet; use query::OccurFilter; -use query::boolean_query::ScoreCombiner; +use query::score_combiner::{ScoreCombiner, SumWithCoordsCombiner}; /// Each `HeapItem` represents the head of /// one of scorer being merged. diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index fef4b870e..e6e9cfd0a 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -1,6 +1,6 @@ use query::Weight; use core::SegmentReader; -use postings::{Intersection, UnionDocSet}; +use postings::{Intersection, Union}; use std::collections::HashMap; use query::EmptyScorer; use query::Scorer; @@ -22,7 +22,7 @@ fn scorer_union<'a>(docsets: Vec>) -> Box { .unwrap() //< we checked the size beforehands } else { // TODO have a UnionScorer instead. - box ConstScorer::new(UnionDocSet::from(docsets)) + box ConstScorer::new(Union::from(docsets)) } } diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index e01e77e52..524da536f 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -1,11 +1,9 @@ mod boolean_query; mod boolean_scorer; mod boolean_weight; -mod score_combiner; pub use self::boolean_query::BooleanQuery; pub use self::boolean_scorer::BooleanScorer; -pub use self::score_combiner::ScoreCombiner; #[cfg(test)] mod tests { diff --git a/src/query/boolean_query/score_combiner.rs b/src/query/boolean_query/score_combiner.rs deleted file mode 100644 index f6d9c8944..000000000 --- a/src/query/boolean_query/score_combiner.rs +++ /dev/null @@ -1,45 +0,0 @@ -use Score; - -pub struct ScoreCombiner { - coords: Vec, - num_fields: usize, - score: Score, -} - -impl ScoreCombiner { - pub fn update(&mut self, score: Score) { - self.score += score; - self.num_fields += 1; - } - - pub fn clear(&mut self) { - self.score = 0f32; - self.num_fields = 0; - } - - /// Compute the coord term - fn coord(&self) -> f32 { - self.coords[self.num_fields] - } - - pub fn score(&self) -> Score { - self.score * self.coord() - } - - pub fn default_for_num_scorers(num_scorers: usize) -> ScoreCombiner { - let query_coords: Vec = (0..num_scorers + 1) - .map(|i| (i as Score) / (num_scorers as Score)) - .collect(); - ScoreCombiner::from(query_coords) - } -} - -impl From> for ScoreCombiner { - fn from(coords: Vec) -> ScoreCombiner { - ScoreCombiner { - coords, - num_fields: 0, - score: 0f32, - } - } -} diff --git a/src/query/mod.rs b/src/query/mod.rs index 85353af90..f77f48972 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -17,6 +17,7 @@ mod range_query; mod exclude; mod reqopt_scorer; +pub mod score_combiner; pub use self::reqopt_scorer::RequiredOptionalScorer; pub use self::exclude::Exclude; pub use self::bitset::BitSetDocSet; diff --git a/src/query/score_combiner.rs b/src/query/score_combiner.rs new file mode 100644 index 000000000..b20ea851a --- /dev/null +++ b/src/query/score_combiner.rs @@ -0,0 +1,67 @@ +use Score; + +pub trait ScoreCombiner: Default + Copy { + fn update(&mut self, score: Score); + fn clear(&mut self); + fn score(&self) -> Score; +} + + +pub struct DoNothingCombiner; +impl ScoreCombiner for DoNothingCombiner { + fn update(&mut self, score: Score) {} + + fn clear(&mut self) {} + + fn score(&self) -> Score { + 1f32 + } +} + +pub struct SumWithCoordsCombiner { + coords: Vec, + num_fields: usize, + score: Score, +} + +impl ScoreCombiner for SumWithCoordsCombiner { + fn update(&mut self, score: Score) { + self.score += score; + self.num_fields += 1; + } + + fn clear(&mut self) { + self.score = 0f32; + self.num_fields = 0; + } + + fn score(&self) -> Score { + self.score * self.coord() + } + +} + +impl SumWithCoordsCombiner { + /// Compute the coord term + fn coord(&self) -> f32 { + self.coords[self.num_fields] + } + + + pub fn default_for_num_scorers(num_scorers: usize) -> Self { + let query_coords: Vec = (0..num_scorers + 1) + .map(|i| (i as Score) / (num_scorers as Score)) + .collect(); + ScoreCombiner::from(query_coords) + } +} + +impl From> for ScoreCombiner { + fn from(coords: Vec) -> SumWithCoordsCombiner { + SumWithCoordsCombiner { + coords, + num_fields: 0, + score: 0f32, + } + } +}