mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-29 22:50:41 +00:00
blop
This commit is contained in:
@@ -31,7 +31,7 @@ pub use self::vec_postings::VecPostings;
|
||||
|
||||
pub use self::segment_postings::{BlockSegmentPostings, SegmentPostings};
|
||||
pub use self::intersection::Intersection;
|
||||
pub use self::union::UnionDocSet;
|
||||
pub use self::union::Union;
|
||||
|
||||
pub use common::HasLen;
|
||||
|
||||
|
||||
@@ -3,22 +3,25 @@ use postings::SkipResult;
|
||||
use common::TinySet;
|
||||
use std::cmp::Ordering;
|
||||
use DocId;
|
||||
use query::score_combiner::{DoNothingCombiner, ScoreCombiner};
|
||||
|
||||
|
||||
const HORIZON_NUM_TINYBITSETS: usize = 2048;
|
||||
const HORIZON_NUM_TINYBITSETS: usize = 32;
|
||||
const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32;
|
||||
|
||||
/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s.
|
||||
pub struct UnionDocSet<TDocSet: DocSet> {
|
||||
pub struct Union<TDocSet: DocSet, TScoreCombiner=DoNothingCombiner>
|
||||
where TDocSet: DocSet, TScoreCombiner: ScoreCombiner {
|
||||
docsets: Vec<TDocSet>,
|
||||
bitsets: Box<[TinySet; HORIZON_NUM_TINYBITSETS]>,
|
||||
scores: Box<[TScoreCombiner; HORIZON as usize]>,
|
||||
cursor: usize,
|
||||
offset: DocId,
|
||||
doc: DocId,
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet> From<Vec<TDocSet>> for UnionDocSet<TDocSet> {
|
||||
fn from(docsets: Vec<TDocSet>) -> UnionDocSet<TDocSet> {
|
||||
impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> From<Vec<TDocSet>> for Union<TDocSet, TScoreCombiner> {
|
||||
fn from(docsets: Vec<TDocSet>) -> Union<TDocSet> {
|
||||
let non_empty_docsets: Vec<TDocSet> =
|
||||
docsets
|
||||
.into_iter()
|
||||
@@ -30,9 +33,10 @@ impl<TDocSet: DocSet> From<Vec<TDocSet>> for UnionDocSet<TDocSet> {
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
UnionDocSet {
|
||||
Union {
|
||||
docsets: non_empty_docsets,
|
||||
bitsets: Box::new([TinySet::empty(); HORIZON_NUM_TINYBITSETS]),
|
||||
scores: Box::new([TScoreCombiner::default(); HORIZON]),
|
||||
cursor: HORIZON_NUM_TINYBITSETS,
|
||||
offset: 0,
|
||||
doc: 0
|
||||
@@ -61,7 +65,7 @@ fn refill<TDocSet: DocSet>(docsets: &mut Vec<TDocSet>, bitsets: &mut [TinySet; H
|
||||
});
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet> UnionDocSet<TDocSet> {
|
||||
impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> Union<TDocSet, TScoreCombiner> {
|
||||
fn refill(&mut self) -> bool {
|
||||
if let Some(min_doc) = self.docsets
|
||||
.iter_mut()
|
||||
@@ -90,7 +94,7 @@ impl<TDocSet: DocSet> UnionDocSet<TDocSet> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet> DocSet for UnionDocSet<TDocSet> {
|
||||
impl<TDocSet: DocSet, TScoreCombiner: ScoreCombiner> DocSet for Union<TDocSet, TScoreCombiner> {
|
||||
|
||||
fn advance(&mut self) -> bool {
|
||||
if self.advance_buffered() {
|
||||
@@ -190,7 +194,7 @@ impl<TDocSet: DocSet> DocSet for UnionDocSet<TDocSet> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::UnionDocSet;
|
||||
use super::Union;
|
||||
use postings::{VecPostings, DocSet};
|
||||
use tests;
|
||||
use test::Bencher;
|
||||
@@ -214,7 +218,7 @@ mod tests {
|
||||
.collect();
|
||||
let mut union_expected = VecPostings::from(union_vals);
|
||||
|
||||
let mut union = UnionDocSet::from(
|
||||
let mut union = Union::from(
|
||||
vals.into_iter()
|
||||
.map(VecPostings::from)
|
||||
.collect::<Vec<VecPostings>>()
|
||||
@@ -260,7 +264,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
let docset_factory = || {
|
||||
let res: Box<DocSet> = box UnionDocSet::from(
|
||||
let res: Box<DocSet> = box Union::from(
|
||||
docs_list
|
||||
.iter()
|
||||
.map(|docs| docs.clone())
|
||||
@@ -298,7 +302,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_union_skip_corner_case3() {
|
||||
let mut docset = UnionDocSet::from(vec![
|
||||
let mut docset = Union::from(vec![
|
||||
VecPostings::from(vec![0u32, 5u32]),
|
||||
VecPostings::from(vec![1u32, 4u32]),
|
||||
]);
|
||||
@@ -338,7 +342,7 @@ mod tests {
|
||||
tests::sample_with_seed(100_000, 0.2, 1),
|
||||
];
|
||||
bench.iter(|| {
|
||||
let mut v = UnionDocSet::from(union_docset.iter()
|
||||
let mut v = Union::from(union_docset.iter()
|
||||
.map(|doc_ids| VecPostings::from(doc_ids.clone()))
|
||||
.collect::<Vec<VecPostings>>());
|
||||
while v.advance() {};
|
||||
@@ -352,7 +356,7 @@ mod tests {
|
||||
tests::sample_with_seed(100_000, 0.001, 2)
|
||||
];
|
||||
bench.iter(|| {
|
||||
let mut v = UnionDocSet::from(union_docset.iter()
|
||||
let mut v = Union::from(union_docset.iter()
|
||||
.map(|doc_ids| VecPostings::from(doc_ids.clone()))
|
||||
.collect::<Vec<VecPostings>>());
|
||||
while v.advance() {};
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::collections::BinaryHeap;
|
||||
use std::cmp::Ordering;
|
||||
use postings::DocSet;
|
||||
use query::OccurFilter;
|
||||
use query::boolean_query::ScoreCombiner;
|
||||
use query::score_combiner::{ScoreCombiner, SumWithCoordsCombiner};
|
||||
|
||||
/// Each `HeapItem` represents the head of
|
||||
/// one of scorer being merged.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use query::Weight;
|
||||
use core::SegmentReader;
|
||||
use postings::{Intersection, UnionDocSet};
|
||||
use postings::{Intersection, Union};
|
||||
use std::collections::HashMap;
|
||||
use query::EmptyScorer;
|
||||
use query::Scorer;
|
||||
@@ -22,7 +22,7 @@ fn scorer_union<'a>(docsets: Vec<Box<Scorer + 'a>>) -> Box<Scorer + 'a> {
|
||||
.unwrap() //< we checked the size beforehands
|
||||
} else {
|
||||
// TODO have a UnionScorer instead.
|
||||
box ConstScorer::new(UnionDocSet::from(docsets))
|
||||
box ConstScorer::new(Union::from(docsets))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
mod boolean_query;
|
||||
mod boolean_scorer;
|
||||
mod boolean_weight;
|
||||
mod score_combiner;
|
||||
|
||||
pub use self::boolean_query::BooleanQuery;
|
||||
pub use self::boolean_scorer::BooleanScorer;
|
||||
pub use self::score_combiner::ScoreCombiner;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
use Score;
|
||||
|
||||
pub struct ScoreCombiner {
|
||||
coords: Vec<Score>,
|
||||
num_fields: usize,
|
||||
score: Score,
|
||||
}
|
||||
|
||||
impl ScoreCombiner {
|
||||
pub fn update(&mut self, score: Score) {
|
||||
self.score += score;
|
||||
self.num_fields += 1;
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
self.score = 0f32;
|
||||
self.num_fields = 0;
|
||||
}
|
||||
|
||||
/// Compute the coord term
|
||||
fn coord(&self) -> f32 {
|
||||
self.coords[self.num_fields]
|
||||
}
|
||||
|
||||
pub fn score(&self) -> Score {
|
||||
self.score * self.coord()
|
||||
}
|
||||
|
||||
pub fn default_for_num_scorers(num_scorers: usize) -> ScoreCombiner {
|
||||
let query_coords: Vec<Score> = (0..num_scorers + 1)
|
||||
.map(|i| (i as Score) / (num_scorers as Score))
|
||||
.collect();
|
||||
ScoreCombiner::from(query_coords)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Score>> for ScoreCombiner {
|
||||
fn from(coords: Vec<Score>) -> ScoreCombiner {
|
||||
ScoreCombiner {
|
||||
coords,
|
||||
num_fields: 0,
|
||||
score: 0f32,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,7 @@ mod range_query;
|
||||
mod exclude;
|
||||
mod reqopt_scorer;
|
||||
|
||||
pub mod score_combiner;
|
||||
pub use self::reqopt_scorer::RequiredOptionalScorer;
|
||||
pub use self::exclude::Exclude;
|
||||
pub use self::bitset::BitSetDocSet;
|
||||
|
||||
67
src/query/score_combiner.rs
Normal file
67
src/query/score_combiner.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
use Score;
|
||||
|
||||
pub trait ScoreCombiner: Default + Copy {
|
||||
fn update(&mut self, score: Score);
|
||||
fn clear(&mut self);
|
||||
fn score(&self) -> Score;
|
||||
}
|
||||
|
||||
|
||||
pub struct DoNothingCombiner;
|
||||
impl ScoreCombiner for DoNothingCombiner {
|
||||
fn update(&mut self, score: Score) {}
|
||||
|
||||
fn clear(&mut self) {}
|
||||
|
||||
fn score(&self) -> Score {
|
||||
1f32
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SumWithCoordsCombiner {
|
||||
coords: Vec<Score>,
|
||||
num_fields: usize,
|
||||
score: Score,
|
||||
}
|
||||
|
||||
impl ScoreCombiner for SumWithCoordsCombiner {
|
||||
fn update(&mut self, score: Score) {
|
||||
self.score += score;
|
||||
self.num_fields += 1;
|
||||
}
|
||||
|
||||
fn clear(&mut self) {
|
||||
self.score = 0f32;
|
||||
self.num_fields = 0;
|
||||
}
|
||||
|
||||
fn score(&self) -> Score {
|
||||
self.score * self.coord()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl SumWithCoordsCombiner {
|
||||
/// Compute the coord term
|
||||
fn coord(&self) -> f32 {
|
||||
self.coords[self.num_fields]
|
||||
}
|
||||
|
||||
|
||||
pub fn default_for_num_scorers(num_scorers: usize) -> Self {
|
||||
let query_coords: Vec<Score> = (0..num_scorers + 1)
|
||||
.map(|i| (i as Score) / (num_scorers as Score))
|
||||
.collect();
|
||||
ScoreCombiner::from(query_coords)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Score>> for ScoreCombiner {
|
||||
fn from(coords: Vec<Score>) -> SumWithCoordsCombiner {
|
||||
SumWithCoordsCombiner {
|
||||
coords,
|
||||
num_fields: 0,
|
||||
score: 0f32,
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user