Refactoring

This commit is contained in:
Paul Masurel
2016-08-14 10:26:56 +09:00
parent 32aad98c15
commit 29abea07d4
12 changed files with 144 additions and 127 deletions

View File

@@ -9,13 +9,11 @@ mod segment_postings;
mod intersection;
mod offset_postings;
mod freq_handler;
mod union_postings;
mod docset;
mod scored_docset;
mod segment_postings_option;
pub use self::docset::{SkipResult, DocSet};
pub use self::union_postings::UnionPostings;
pub use self::offset_postings::OffsetPostings;
pub use self::recorder::{Recorder, NothingRecorder, TermFrequencyRecorder, TFAndPositionRecorder};
pub use self::serializer::PostingsSerializer;

View File

@@ -2,10 +2,13 @@ use DocId;
use postings::{Postings, DocSet};
use std::cmp::Ordering;
use std::collections::BinaryHeap;
use query::MultiTermAccumulator;
use query::MultiTermAccumulator;
use query::Similarity;
use fastfield::U32FastFieldReader;
use query::Occur;
use std::iter;
use super::Scorer;
use Score;
#[derive(Eq, PartialEq)]
struct HeapItem(DocId, u32);
@@ -55,27 +58,27 @@ impl Filter {
}
}
pub struct UnionPostings<TPostings: Postings, TAccumulator: MultiTermAccumulator> {
pub struct DAATMultiTermScorer<TPostings: Postings, TAccumulator: MultiTermAccumulator> {
fieldnorm_readers: Vec<U32FastFieldReader>,
postings: Vec<TPostings>,
term_frequencies: Vec<u32>,
queue: BinaryHeap<HeapItem>,
doc: DocId,
scorer: TAccumulator,
similarity: TAccumulator,
filter: Filter,
}
impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPostings, TAccumulator> {
impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> DAATMultiTermScorer<TPostings, TAccumulator> {
fn new_non_empty(
fieldnorm_readers: Vec<U32FastFieldReader>,
postings: Vec<TPostings>,
scorer: TAccumulator,
similarity: TAccumulator,
filter: Filter
) -> UnionPostings<TPostings, TAccumulator> {
) -> DAATMultiTermScorer<TPostings, TAccumulator> {
let mut term_frequencies: Vec<u32> = iter::repeat(0u32).take(postings.len()).collect();
let heap_items: Vec<HeapItem> = postings
.iter()
@@ -88,18 +91,18 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPos
HeapItem(doc, ord as u32)
})
.collect();
UnionPostings {
DAATMultiTermScorer {
fieldnorm_readers: fieldnorm_readers,
postings: postings,
term_frequencies: term_frequencies,
queue: BinaryHeap::from(heap_items),
doc: 0,
scorer: scorer,
similarity: similarity,
filter: filter
}
}
pub fn new(postings_and_fieldnorms: Vec<(Occur, TPostings, U32FastFieldReader)>, scorer: TAccumulator) -> UnionPostings<TPostings, TAccumulator> {
pub fn new(postings_and_fieldnorms: Vec<(Occur, TPostings, U32FastFieldReader)>, similarity: TAccumulator) -> DAATMultiTermScorer<TPostings, TAccumulator> {
let mut postings = Vec::new();
let mut fieldnorm_readers = Vec::new();
let mut occurs = Vec::new();
@@ -111,12 +114,12 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPos
}
}
let filter = Filter::new(&occurs);
UnionPostings::new_non_empty(fieldnorm_readers, postings, scorer, filter)
DAATMultiTermScorer::new_non_empty(fieldnorm_readers, postings, similarity, filter)
}
pub fn scorer(&self,) -> &TAccumulator {
&self.scorer
&self.similarity
}
fn advance_head(&mut self,) {
@@ -138,11 +141,17 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> UnionPostings<TPos
}
impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> DocSet for UnionPostings<TPostings, TAccumulator> {
impl<TPostings: Postings, TSimilarity: Similarity> Scorer for DAATMultiTermScorer<TPostings, TSimilarity> {
fn score(&self,) -> Score {
self.similarity.score()
}
}
impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> DocSet for DAATMultiTermScorer<TPostings, TAccumulator> {
fn advance(&mut self,) -> bool {
loop {
self.scorer.clear();
self.similarity.clear();
let mut ord_bitset = 0u64;
match self.queue.peek() {
Some(&HeapItem(doc, ord)) => {
@@ -150,7 +159,7 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> DocSet for UnionPo
let ord: usize = ord as usize;
let fieldnorm = self.get_field_norm(ord, doc);
let tf = self.term_frequencies[ord];
self.scorer.update(ord, tf, fieldnorm);
self.similarity.update(ord, tf, fieldnorm);
ord_bitset |= 1 << ord;
}
None => {
@@ -168,7 +177,7 @@ impl<TPostings: Postings, TAccumulator: MultiTermAccumulator> DocSet for UnionPo
let peek_ord: usize = peek_ord as usize;
let peek_tf = self.term_frequencies[peek_ord];
let peek_fieldnorm = self.get_field_norm(peek_ord, peek_doc);
self.scorer.update(peek_ord, peek_tf, peek_fieldnorm);
self.similarity.update(peek_ord, peek_tf, peek_fieldnorm);
ord_bitset |= 1 << peek_ord;
}
}
@@ -193,7 +202,7 @@ mod tests {
use super::*;
use postings::{DocSet, VecPostings};
use query::TfIdfScorer;
use query::TfIdf;
use query::Scorer;
use directory::ReadOnlySource;
use directory::SharedVec;
@@ -225,21 +234,21 @@ mod tests {
let right_fieldnorms = create_u32_fastfieldreader(Field(2), vec!(15,25,35));
let left = VecPostings::from(vec!(1, 2, 3));
let right = VecPostings::from(vec!(1, 3, 8));
let multi_term_scorer = TfIdfScorer::new(vec!(0f32, 1f32, 2f32), vec!(1f32, 4f32));
let mut union = UnionPostings::new(
let tfidf = TfIdf::new(vec!(0f32, 1f32, 2f32), vec!(1f32, 4f32));
let mut union = DAATMultiTermScorer::new(
vec!(
(Occur::Should, left, left_fieldnorms),
(Occur::Should, right, right_fieldnorms),
),
multi_term_scorer
tfidf
);
assert_eq!(union.next(), Some(1u32));
assert!(abs_diff(union.scorer().score(), 2.182179f32) < 0.001);
assert!(abs_diff(union.score(), 2.182179f32) < 0.001);
assert_eq!(union.next(), Some(2u32));
assert!(abs_diff(union.scorer().score(), 0.2236068) < 0.001f32);
assert!(abs_diff(union.score(), 0.2236068) < 0.001f32);
assert_eq!(union.next(), Some(3u32));
assert_eq!(union.next(), Some(8u32));
assert!(abs_diff(union.scorer().score(), 0.8944272f32) < 0.001f32);
assert!(abs_diff(union.score(), 0.8944272f32) < 0.001f32);
assert!(!union.advance());
}

View File

@@ -1,21 +1,26 @@
mod query;
mod multi_term_query;
mod multi_term_scorer;
mod multi_term_explainer;
mod multi_term_accumulator;
mod similarity_explainer;
mod scorer;
mod query_parser;
mod explanation;
mod tfidf;
mod occur;
mod daat_multiterm_scorer;
mod similarity;
pub use self::similarity::Similarity;
pub use self::daat_multiterm_scorer::DAATMultiTermScorer;
pub use self::occur::Occur;
pub use self::query::Query;
pub use self::multi_term_query::MultiTermQuery;
pub use self::multi_term_scorer::MultiTermScorer;
pub use self::multi_term_explainer::MultiTermExplainer;
pub use self::tfidf::TfIdfScorer;
pub use self::similarity_explainer::SimilarityExplainer;
pub use self::tfidf::TfIdf;
pub use self::scorer::Scorer;
pub use self::query_parser::QueryParser;
pub use self::explanation::Explanation;
pub use self::multi_term_scorer::MultiTermAccumulator;
pub use self::multi_term_accumulator::MultiTermAccumulator;

View File

@@ -0,0 +1,4 @@
pub trait MultiTermAccumulator {
fn update(&mut self, term_ord: usize, term_freq: u32, fieldnorm: u32);
fn clear(&mut self,);
}

View File

@@ -1,36 +0,0 @@
use super::MultiTermAccumulator;
use super::MultiTermScorer;
use super::Explanation;
pub struct MultiTermExplainer<TScorer: MultiTermScorer + Sized> {
scorer: TScorer,
vals: Vec<(usize, u32, u32)>,
}
impl<TScorer: MultiTermScorer + Sized> MultiTermExplainer<TScorer> {
pub fn explain_score(&self,) -> Explanation {
self.scorer.explain(&self.vals)
}
}
impl<TScorer: MultiTermScorer + Sized> From<TScorer> for MultiTermExplainer<TScorer> {
fn from(multi_term_scorer: TScorer) -> MultiTermExplainer<TScorer> {
MultiTermExplainer {
scorer: multi_term_scorer,
vals: Vec::new(),
}
}
}
impl<TScorer: MultiTermScorer + Sized> MultiTermAccumulator for MultiTermExplainer<TScorer> {
fn update(&mut self, term_ord: usize, term_freq: u32, fieldnorm: u32) {
self.vals.push((term_ord, term_freq, fieldnorm));
self.scorer.update(term_ord, term_freq, fieldnorm);
}
fn clear(&mut self,) {
self.vals.clear();
self.scorer.clear();
}
}

View File

@@ -8,11 +8,10 @@ use core::searcher::Searcher;
use collector::Collector;
use SegmentLocalId;
use core::SegmentReader;
use query::MultiTermExplainer;
use query::SimilarityExplainer;
use postings::SegmentPostings;
use postings::UnionPostings;
use postings::DocSet;
use query::TfIdfScorer;
use query::TfIdf;
use postings::SkipResult;
use ScoredDoc;
use query::Scorer;
@@ -21,6 +20,7 @@ use DocAddress;
use query::Explanation;
use query::occur::Occur;
use postings::SegmentPostingsOption;
use query::DAATMultiTermScorer;
#[derive(Eq, PartialEq, Debug)]
@@ -30,13 +30,12 @@ pub struct MultiTermQuery {
impl MultiTermQuery {
pub fn num_terms(&self,) -> usize {
self.occur_terms.len()
}
fn scorer(&self, searcher: &Searcher) -> TfIdfScorer {
fn scorer(&self, searcher: &Searcher) -> TfIdf {
let num_terms = self.num_terms();
let num_docs = searcher.num_docs() as f32;
let idfs: Vec<f32> = self.occur_terms
@@ -59,7 +58,7 @@ impl MultiTermQuery {
.iter()
.map(|&(_, ref term)| format!("{:?}", &term))
.collect();
let mut tfidf_scorer = TfIdfScorer::new(query_coords, idfs);
let mut tfidf_scorer = TfIdf::new(query_coords, idfs);
tfidf_scorer.set_term_names(term_names);
tfidf_scorer
}
@@ -68,7 +67,7 @@ impl MultiTermQuery {
&'b self,
reader: &'b SegmentReader,
multi_term_scorer: TScorer,
mut timer: OpenTimer<'a>) -> Result<UnionPostings<SegmentPostings, TScorer>> {
mut timer: OpenTimer<'a>) -> Result<DAATMultiTermScorer<SegmentPostings, TScorer>> {
let mut postings_and_fieldnorms = Vec::with_capacity(self.num_terms());
{
let mut decode_timer = timer.open("decode_all");
@@ -88,7 +87,7 @@ impl MultiTermQuery {
// TODO putting the SHOULD at the end of the list should push the limit.
return Err(Error::InvalidArgument(String::from("Limit of 64 terms was exceeded.")));
}
Ok(UnionPostings::new(postings_and_fieldnorms, multi_term_scorer))
Ok(DAATMultiTermScorer::new(postings_and_fieldnorms, multi_term_scorer))
}
}
@@ -120,7 +119,7 @@ impl Query for MultiTermQuery {
searcher: &Searcher,
doc_address: &DocAddress) -> Result<Explanation> {
let segment_reader = &searcher.segments()[doc_address.segment_ord() as usize];
let multi_term_scorer = MultiTermExplainer::from(self.scorer(searcher));
let multi_term_scorer = SimilarityExplainer::from(self.scorer(searcher));
let mut timer_tree = TimerTree::new();
let mut postings = try!(
self.search_segment(
@@ -164,7 +163,7 @@ impl Query for MultiTermQuery {
{
let _collection_timer = segment_search_timer.open("collection");
while postings.advance() {
let scored_doc = ScoredDoc(postings.scorer().score(), postings.doc());
let scored_doc = ScoredDoc(postings.score(), postings.doc());
collector.collect(scored_doc);
}
}

View File

@@ -1,11 +0,0 @@
use query::Scorer;
use query::Explanation;
pub trait MultiTermAccumulator {
fn update(&mut self, term_ord: usize, term_freq: u32, fieldnorm: u32);
fn clear(&mut self,);
}
pub trait MultiTermScorer: Scorer + MultiTermAccumulator {
fn explain(&self, vals: &Vec<(usize, u32, u32)>) -> Explanation;
}

View File

@@ -1,3 +1,7 @@
pub trait Scorer {
use DocSet;
pub trait Scorer: DocSet {
fn score(&self,) -> f32;
}

8
src/query/similarity.rs Normal file
View File

@@ -0,0 +1,8 @@
use Score;
use query::Explanation;
use query::MultiTermAccumulator;
pub trait Similarity: MultiTermAccumulator {
fn score(&self, ) -> Score;
fn explain(&self, vals: &Vec<(usize, u32, u32)>) -> Explanation;
}

View File

@@ -0,0 +1,48 @@
use Score;
use super::MultiTermAccumulator;
use super::Similarity;
use super::Explanation;
pub struct SimilarityExplainer<TSimilarity: Similarity + Sized> {
scorer: TSimilarity,
vals: Vec<(usize, u32, u32)>,
}
impl<TSimilarity: Similarity + Sized> SimilarityExplainer<TSimilarity> {
pub fn explain_score(&self,) -> Explanation {
self.scorer.explain(&self.vals)
}
}
impl<TSimilarity: Similarity + Sized> From<TSimilarity> for SimilarityExplainer<TSimilarity> {
fn from(multi_term_scorer: TSimilarity) -> SimilarityExplainer<TSimilarity> {
SimilarityExplainer {
scorer: multi_term_scorer,
vals: Vec::new(),
}
}
}
impl<TSimilarity: Similarity + Sized> MultiTermAccumulator for SimilarityExplainer<TSimilarity> {
fn update(&mut self, term_ord: usize, term_freq: u32, fieldnorm: u32) {
self.vals.push((term_ord, term_freq, fieldnorm));
self.scorer.update(term_ord, term_freq, fieldnorm);
}
fn clear(&mut self,) {
self.vals.clear();
self.scorer.clear();
}
}
impl<TSimilarity: Similarity + Sized> Similarity for SimilarityExplainer<TSimilarity> {
fn score(&self,) -> Score {
self.scorer.score()
}
fn explain(&self, vals: &Vec<(usize, u32, u32)>) -> Explanation {
self.scorer.explain(vals)
}
}

View File

@@ -1,10 +1,10 @@
use Score;
use super::MultiTermAccumulator;
use super::Scorer;
use super::MultiTermScorer;
use super::Explanation;
use super::Similarity;
#[derive(Clone)]
pub struct TfIdfScorer {
pub struct TfIdf {
coords: Vec<f32>,
idf: Vec<f32>,
score: f32,
@@ -12,7 +12,7 @@ pub struct TfIdfScorer {
term_names: Option<Vec<String>>, //< only here for explain
}
impl MultiTermAccumulator for TfIdfScorer {
impl MultiTermAccumulator for TfIdf {
#[inline(always)]
fn update(&mut self, term_ord: usize, term_freq: u32, fieldnorm: u32) {
@@ -28,9 +28,9 @@ impl MultiTermAccumulator for TfIdfScorer {
}
}
impl TfIdfScorer {
pub fn new(coords: Vec<f32>, idf: Vec<f32>) -> TfIdfScorer {
TfIdfScorer {
impl TfIdf {
pub fn new(coords: Vec<f32>, idf: Vec<f32>) -> TfIdf {
TfIdf {
coords: coords,
idf: idf,
score: 0f32,
@@ -61,14 +61,12 @@ impl TfIdfScorer {
}
}
impl Scorer for TfIdfScorer {
impl Similarity for TfIdf {
#[inline(always)]
fn score(&self, ) -> f32 {
fn score(&self, ) -> Score {
self.score * self.coord()
}
}
impl MultiTermScorer for TfIdfScorer {
fn explain(&self, vals: &Vec<(usize, u32, u32)>) -> Explanation {
let score = self.score();
let mut explanation = Explanation::with_val(score);
@@ -89,41 +87,41 @@ impl MultiTermScorer for TfIdfScorer {
#[cfg(test)]
mod tests {
use super::*;
use query::Scorer;
use query::MultiTermAccumulator;
use query::Similarity;
fn abs_diff(left: f32, right: f32) -> f32 {
(right - left).abs()
}
#[test]
pub fn test_multiterm_scorer() {
let mut tfidf_scorer = TfIdfScorer::new(vec!(0f32, 1f32, 2f32), vec!(1f32, 4f32));
pub fn test_tfidf() {
let mut tfidf = TfIdf::new(vec!(0f32, 1f32, 2f32), vec!(1f32, 4f32));
{
tfidf_scorer.update(0, 1, 1);
assert!(abs_diff(tfidf_scorer.score(), 1f32) < 0.001f32);
tfidf_scorer.clear();
tfidf.update(0, 1, 1);
assert!(abs_diff(tfidf.score(), 1f32) < 0.001f32);
tfidf.clear();
}
{
tfidf_scorer.update(1, 1, 1);
assert_eq!(tfidf_scorer.score(), 4f32);
tfidf_scorer.clear();
tfidf.update(1, 1, 1);
assert_eq!(tfidf.score(), 4f32);
tfidf.clear();
}
{
tfidf_scorer.update(0, 2, 1);
assert!(abs_diff(tfidf_scorer.score(), 1.4142135) < 0.001f32);
tfidf_scorer.clear();
tfidf.update(0, 2, 1);
assert!(abs_diff(tfidf.score(), 1.4142135) < 0.001f32);
tfidf.clear();
}
{
tfidf_scorer.update(0, 1, 1);
tfidf_scorer.update(1, 1, 1);
assert_eq!(tfidf_scorer.score(), 10f32);
tfidf_scorer.clear();
tfidf.update(0, 1, 1);
tfidf.update(1, 1, 1);
assert_eq!(tfidf.score(), 10f32);
tfidf.clear();
}

View File

@@ -22,20 +22,11 @@ use super::*;
/// ```
/// use tantivy::schema::*;
///
/// fn create_schema() -> Schema {
/// let mut schema = Schema::new();
/// let str_fieldtype = TextOptions::new();
/// let id_field = schema.add_text_field("id", STRING);
/// let url_field = schema.add_text_field("url", STRING);
/// let body_field = schema.add_text_field("body", TEXT);
/// let id_field = schema.add_text_field("id", STRING);
/// let url_field = schema.add_text_field("url", STRING);
/// let title_field = schema.add_text_field("title", TEXT);
/// let body_field = schema.add_text_field("body", TEXT);
/// schema
/// }
///
/// let schema = create_schema();
/// ```
#[derive(Clone, Debug)]
pub struct Schema {