This commit is contained in:
Paul Masurel
2018-02-16 14:57:08 +09:00
parent 31e5580bfa
commit 76e8db6ed3
8 changed files with 89 additions and 64 deletions

View File

@@ -31,7 +31,7 @@ pub use self::vec_postings::VecPostings;
pub use self::segment_postings::{BlockSegmentPostings, SegmentPostings};
pub use self::intersection::Intersection;
pub use self::union::UnionDocSet;
pub use self::union::Union;
pub use common::HasLen;

View File

@@ -3,22 +3,25 @@ use postings::SkipResult;
use common::TinySet;
use std::cmp::Ordering;
use DocId;
use query::score_combiner::{DoNothingCombiner, ScoreCombiner};
const HORIZON_NUM_TINYBITSETS: usize = 2048;
const HORIZON_NUM_TINYBITSETS: usize = 32;
const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32;
/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s.
pub struct UnionDocSet<TDocSet: DocSet> {
pub struct Union<TDocSet: DocSet, TScoreCombiner=DoNothingCombiner>
where TDocSet: DocSet, TScoreCombiner: ScoreCombiner {
docsets: Vec<TDocSet>,
bitsets: Box<[TinySet; HORIZON_NUM_TINYBITSETS]>,
scores: Box<[TScoreCombiner; HORIZON as usize]>,
cursor: usize,
offset: DocId,
doc: DocId,
}
impl<TDocSet: DocSet> From<Vec<TDocSet>> for UnionDocSet<TDocSet> {
fn from(docsets: Vec<TDocSet>) -> UnionDocSet<TDocSet> {
impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> From<Vec<TDocSet>> for Union<TDocSet, TScoreCombiner> {
fn from(docsets: Vec<TDocSet>) -> Union<TDocSet> {
let non_empty_docsets: Vec<TDocSet> =
docsets
.into_iter()
@@ -30,9 +33,10 @@ impl<TDocSet: DocSet> From<Vec<TDocSet>> for UnionDocSet<TDocSet> {
}
})
.collect();
UnionDocSet {
Union {
docsets: non_empty_docsets,
bitsets: Box::new([TinySet::empty(); HORIZON_NUM_TINYBITSETS]),
scores: Box::new([TScoreCombiner::default(); HORIZON]),
cursor: HORIZON_NUM_TINYBITSETS,
offset: 0,
doc: 0
@@ -61,7 +65,7 @@ fn refill<TDocSet: DocSet>(docsets: &mut Vec<TDocSet>, bitsets: &mut [TinySet; H
});
}
impl<TDocSet: DocSet> UnionDocSet<TDocSet> {
impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> Union<TDocSet, TScoreCombiner> {
fn refill(&mut self) -> bool {
if let Some(min_doc) = self.docsets
.iter_mut()
@@ -90,7 +94,7 @@ impl<TDocSet: DocSet> UnionDocSet<TDocSet> {
}
}
impl<TDocSet: DocSet> DocSet for UnionDocSet<TDocSet> {
impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> DocSet for Union<TDocSet, TScoreCombiner> {
fn advance(&mut self) -> bool {
if self.advance_buffered() {
@@ -190,7 +194,7 @@ impl<TDocSet: DocSet> DocSet for UnionDocSet<TDocSet> {
#[cfg(test)]
mod tests {
use super::UnionDocSet;
use super::Union;
use postings::{VecPostings, DocSet};
use tests;
use test::Bencher;
@@ -214,7 +218,7 @@ mod tests {
.collect();
let mut union_expected = VecPostings::from(union_vals);
let mut union = UnionDocSet::from(
let mut union = Union::from(
vals.into_iter()
.map(VecPostings::from)
.collect::<Vec<VecPostings>>()
@@ -260,7 +264,7 @@ mod tests {
}
}
let docset_factory = || {
let res: Box<DocSet> = box UnionDocSet::from(
let res: Box<DocSet> = box Union::from(
docs_list
.iter()
.map(|docs| docs.clone())
@@ -298,7 +302,7 @@ mod tests {
#[test]
fn test_union_skip_corner_case3() {
let mut docset = UnionDocSet::from(vec![
let mut docset = Union::from(vec![
VecPostings::from(vec![0u32, 5u32]),
VecPostings::from(vec![1u32, 4u32]),
]);
@@ -338,7 +342,7 @@ mod tests {
tests::sample_with_seed(100_000, 0.2, 1),
];
bench.iter(|| {
let mut v = UnionDocSet::from(union_docset.iter()
let mut v = Union::from(union_docset.iter()
.map(|doc_ids| VecPostings::from(doc_ids.clone()))
.collect::<Vec<VecPostings>>());
while v.advance() {};
@@ -352,7 +356,7 @@ mod tests {
tests::sample_with_seed(100_000, 0.001, 2)
];
bench.iter(|| {
let mut v = UnionDocSet::from(union_docset.iter()
let mut v = Union::from(union_docset.iter()
.map(|doc_ids| VecPostings::from(doc_ids.clone()))
.collect::<Vec<VecPostings>>());
while v.advance() {};

View File

@@ -4,7 +4,7 @@ use std::collections::BinaryHeap;
use std::cmp::Ordering;
use postings::DocSet;
use query::OccurFilter;
use query::boolean_query::ScoreCombiner;
use query::score_combiner::{ScoreCombiner, SumWithCoordsCombiner};
/// Each `HeapItem` represents the head of
/// one of scorer being merged.

View File

@@ -1,6 +1,6 @@
use query::Weight;
use core::SegmentReader;
use postings::{Intersection, UnionDocSet};
use postings::{Intersection, Union};
use std::collections::HashMap;
use query::EmptyScorer;
use query::Scorer;
@@ -22,7 +22,7 @@ fn scorer_union<'a>(docsets: Vec<Box<Scorer + 'a>>) -> Box<Scorer + 'a> {
.unwrap() //< we checked the size beforehands
} else {
// TODO have a UnionScorer instead.
box ConstScorer::new(UnionDocSet::from(docsets))
box ConstScorer::new(Union::from(docsets))
}
}

View File

@@ -1,11 +1,9 @@
mod boolean_query;
mod boolean_scorer;
mod boolean_weight;
mod score_combiner;
pub use self::boolean_query::BooleanQuery;
pub use self::boolean_scorer::BooleanScorer;
pub use self::score_combiner::ScoreCombiner;
#[cfg(test)]
mod tests {

View File

@@ -1,45 +0,0 @@
use Score;
pub struct ScoreCombiner {
coords: Vec<Score>,
num_fields: usize,
score: Score,
}
impl ScoreCombiner {
pub fn update(&mut self, score: Score) {
self.score += score;
self.num_fields += 1;
}
pub fn clear(&mut self) {
self.score = 0f32;
self.num_fields = 0;
}
/// Compute the coord term
fn coord(&self) -> f32 {
self.coords[self.num_fields]
}
pub fn score(&self) -> Score {
self.score * self.coord()
}
pub fn default_for_num_scorers(num_scorers: usize) -> ScoreCombiner {
let query_coords: Vec<Score> = (0..num_scorers + 1)
.map(|i| (i as Score) / (num_scorers as Score))
.collect();
ScoreCombiner::from(query_coords)
}
}
impl From<Vec<Score>> for ScoreCombiner {
fn from(coords: Vec<Score>) -> ScoreCombiner {
ScoreCombiner {
coords,
num_fields: 0,
score: 0f32,
}
}
}

View File

@@ -17,6 +17,7 @@ mod range_query;
mod exclude;
mod reqopt_scorer;
pub mod score_combiner;
pub use self::reqopt_scorer::RequiredOptionalScorer;
pub use self::exclude::Exclude;
pub use self::bitset::BitSetDocSet;

View File

@@ -0,0 +1,67 @@
use Score;
pub trait ScoreCombiner: Default + Copy {
fn update(&mut self, score: Score);
fn clear(&mut self);
fn score(&self) -> Score;
}
pub struct DoNothingCombiner;
impl ScoreCombiner for DoNothingCombiner {
fn update(&mut self, score: Score) {}
fn clear(&mut self) {}
fn score(&self) -> Score {
1f32
}
}
pub struct SumWithCoordsCombiner {
coords: Vec<Score>,
num_fields: usize,
score: Score,
}
impl ScoreCombiner for SumWithCoordsCombiner {
fn update(&mut self, score: Score) {
self.score += score;
self.num_fields += 1;
}
fn clear(&mut self) {
self.score = 0f32;
self.num_fields = 0;
}
fn score(&self) -> Score {
self.score * self.coord()
}
}
impl SumWithCoordsCombiner {
/// Compute the coord term
fn coord(&self) -> f32 {
self.coords[self.num_fields]
}
pub fn default_for_num_scorers(num_scorers: usize) -> Self {
let query_coords: Vec<Score> = (0..num_scorers + 1)
.map(|i| (i as Score) / (num_scorers as Score))
.collect();
ScoreCombiner::from(query_coords)
}
}
impl From<Vec<Score>> for ScoreCombiner {
fn from(coords: Vec<Score>) -> SumWithCoordsCombiner {
SumWithCoordsCombiner {
coords,
num_fields: 0,
score: 0f32,
}
}
}