* Added explanation

* Explain

* Splitting weight and idf

* Added comments

Closes #36
This commit is contained in:
Paul Masurel
2019-06-06 10:03:54 +09:00
committed by GitHub
parent d590f4c6b0
commit 4822940b19
17 changed files with 493 additions and 86 deletions

View File

@@ -5,6 +5,7 @@ Tantivy 0.10.0
- Added an ASCII folding filter (@drusellers)
- Bugfix in `query.count` in presence of deletes (@pmasurel)
- Added `.explain(...)` in `Query` and `Weight` to (@pmasurel)
Minor
---------

View File

@@ -1,7 +1,8 @@
use core::Searcher;
use core::SegmentReader;
use docset::DocSet;
use query::{Query, Scorer, Weight};
use query::explanation::does_not_match;
use query::{Explanation, Query, Scorer, Weight};
use DocId;
use Result;
use Score;
@@ -29,6 +30,13 @@ impl Weight for AllWeight {
max_doc: reader.max_doc(),
}))
}
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
if doc >= reader.max_doc() {
return Err(does_not_match(doc));
}
Ok(Explanation::new("AllQuery", 1f32))
}
}
enum State {

View File

@@ -1,12 +1,14 @@
use common::BitSet;
use core::SegmentReader;
use query::BitSetDocSet;
use query::ConstScorer;
use query::{BitSetDocSet, Explanation};
use query::{Scorer, Weight};
use schema::{Field, IndexRecordOption};
use tantivy_fst::Automaton;
use termdict::{TermDictionary, TermStreamer};
use Result;
use DocId;
use TantivyError;
use {Result, SkipResult};
/// A weight struct for Fuzzy Term and Regex Queries
pub struct AutomatonWeight<A>
@@ -56,4 +58,15 @@ where
let doc_bitset = BitSetDocSet::from(doc_bitset);
Ok(Box::new(ConstScorer::new(doc_bitset)))
}
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
let mut scorer = self.scorer(reader)?;
if scorer.skip_next(doc) == SkipResult::Reached {
Ok(Explanation::new("AutomatonScorer", 1.0f32))
} else {
Err(TantivyError::InvalidArgument(
"Document does not exist".to_string(),
))
}
}
}

View File

@@ -1,4 +1,5 @@
use fieldnorm::FieldNormReader;
use query::Explanation;
use Score;
use Searcher;
use Term;
@@ -26,18 +27,13 @@ fn compute_tf_cache(average_fieldnorm: f32) -> [f32; 256] {
#[derive(Clone)]
pub struct BM25Weight {
idf_explain: Explanation,
weight: f32,
cache: [f32; 256],
average_fieldnorm: f32,
}
impl BM25Weight {
pub fn null() -> BM25Weight {
BM25Weight {
weight: 0f32,
cache: [1f32; 256],
}
}
pub fn for_terms(searcher: &Searcher, terms: &[Term]) -> BM25Weight {
assert!(!terms.is_empty(), "BM25 requires at least one term");
let field = terms[0].field();
@@ -58,20 +54,37 @@ impl BM25Weight {
}
let average_fieldnorm = total_num_tokens as f32 / total_num_docs as f32;
let idf = terms
.iter()
.map(|term| {
let term_doc_freq = searcher.doc_freq(term);
idf(term_doc_freq, total_num_docs)
})
.sum::<f32>();
BM25Weight::new(idf, average_fieldnorm)
let mut idf_explain: Explanation;
if terms.len() == 1 {
let term_doc_freq = searcher.doc_freq(&terms[0]);
let idf = idf(term_doc_freq, total_num_docs);
idf_explain =
Explanation::new("idf, computed as log(1 + (N - n + 0.5) / (n + 0.5))", idf);
idf_explain.add_const(
"n, number of docs containing this term",
term_doc_freq as f32,
);
idf_explain.add_const("N, total number of docs", total_num_docs as f32);
} else {
let idf = terms
.iter()
.map(|term| {
let term_doc_freq = searcher.doc_freq(term);
idf(term_doc_freq, total_num_docs)
})
.sum::<f32>();
idf_explain = Explanation::new("idf", idf);
}
BM25Weight::new(idf_explain, average_fieldnorm)
}
fn new(idf: f32, average_fieldnorm: f32) -> BM25Weight {
fn new(idf_explain: Explanation, average_fieldnorm: f32) -> BM25Weight {
let weight = idf_explain.value() * (1f32 + K1);
BM25Weight {
weight: idf * (1f32 + K1),
idf_explain,
weight,
cache: compute_tf_cache(average_fieldnorm),
average_fieldnorm,
}
}
@@ -81,6 +94,37 @@ impl BM25Weight {
let term_freq = term_freq as f32;
self.weight * term_freq / (term_freq + norm)
}
pub fn explain(&self, fieldnorm_id: u8, term_freq: u32) -> Explanation {
// The explain format is directly copied from Lucene's.
// (So, Kudos to Lucene)
let score = self.score(fieldnorm_id, term_freq);
let norm = self.cache[fieldnorm_id as usize];
let term_freq = term_freq as f32;
let right_factor = term_freq / (term_freq + norm);
let mut tf_explanation = Explanation::new(
"freq / (freq + k1 * (1 - b + b * dl / avgdl))",
right_factor,
);
tf_explanation.add_const("freq, occurrences of term within document", term_freq);
tf_explanation.add_const("k1, term saturation parameter", K1);
tf_explanation.add_const("b, length normalization parameter", B);
tf_explanation.add_const(
"dl, length of field",
FieldNormReader::id_to_fieldnorm(fieldnorm_id) as f32,
);
tf_explanation.add_const("avgdl, average length of field", self.average_fieldnorm);
let mut explanation = Explanation::new("TermQuery, product of...", score);
explanation.add_detail(Explanation::new("(K1+1)", K1 + 1f32));
explanation.add_detail(self.idf_explain.clone());
explanation.add_detail(tf_explanation);
explanation
}
}
#[cfg(test)]

View File

@@ -1,5 +1,5 @@
use core::SegmentReader;
use query::intersect_scorers;
use query::explanation::does_not_match;
use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner};
use query::term_query::TermScorer;
use query::EmptyScorer;
@@ -9,8 +9,10 @@ use query::RequiredOptionalScorer;
use query::Scorer;
use query::Union;
use query::Weight;
use query::{intersect_scorers, Explanation};
use std::collections::HashMap;
use Result;
use {DocId, SkipResult};
fn scorer_union<TScoreCombiner>(scorers: Vec<Box<Scorer>>) -> Box<Scorer>
where
@@ -50,10 +52,10 @@ impl BooleanWeight {
}
}
fn complex_scorer<TScoreCombiner: ScoreCombiner>(
fn per_occur_scorers(
&self,
reader: &SegmentReader,
) -> Result<Box<Scorer>> {
) -> Result<HashMap<Occur, Vec<Box<Scorer>>>> {
let mut per_occur_scorers: HashMap<Occur, Vec<Box<Scorer>>> = HashMap::new();
for &(ref occur, ref subweight) in &self.weights {
let sub_scorer: Box<Scorer> = subweight.scorer(reader)?;
@@ -62,6 +64,14 @@ impl BooleanWeight {
.or_insert_with(Vec::new)
.push(sub_scorer);
}
Ok(per_occur_scorers)
}
fn complex_scorer<TScoreCombiner: ScoreCombiner>(
&self,
reader: &SegmentReader,
) -> Result<Box<Scorer>> {
let mut per_occur_scorers = self.per_occur_scorers(reader)?;
let should_scorer_opt: Option<Box<Scorer>> = per_occur_scorers
.remove(&Occur::Should)
@@ -118,4 +128,31 @@ impl Weight for BooleanWeight {
self.complex_scorer::<DoNothingCombiner>(reader)
}
}
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
let mut scorer = self.scorer(reader)?;
if scorer.skip_next(doc) != SkipResult::Reached {
return Err(does_not_match(doc));
}
if !self.scoring_enabled {
return Ok(Explanation::new("BooleanQuery with no scoring", 1f32));
}
let mut explanation = Explanation::new("BooleanClause. Sum of ...", scorer.score());
for &(ref occur, ref subweight) in &self.weights {
if is_positive_occur(*occur) {
if let Ok(child_explanation) = subweight.explain(reader, doc) {
explanation.add_detail(child_explanation);
}
}
}
Ok(explanation)
}
}
fn is_positive_occur(occur: Occur) -> bool {
match occur {
Occur::Must | Occur::Should => true,
Occur::MustNot => false,
}
}

View File

@@ -18,8 +18,8 @@ mod tests {
use query::Scorer;
use query::TermQuery;
use schema::*;
use DocId;
use Index;
use {DocAddress, DocId};
fn aux_test_helper() -> (Index, Field) {
let mut schema_builder = Schema::builder();
@@ -205,4 +205,167 @@ mod tests {
assert_eq!(score_docs(&boolean_query), vec![0.977973, 0.84699446]);
}
}
// motivated by #554
#[test]
fn test_bm25_several_fields() {
let mut schema_builder = Schema::builder();
let title = schema_builder.add_text_field("title", TEXT);
let text = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
index_writer.add_document(doc!(
// tf = 1 0
title => "Законы притяжения Оксана Кулакова",
// tf = 1 0
text => "Законы притяжения Оксана Кулакова] \n\nТема: Сексуальное искусство, Женственность\nТип товара: Запись вебинара (аудио)\nПродолжительность: 1,5 часа\n\nСсылка на вебинар:\n ",
));
index_writer.add_document(doc!(
// tf = 1 0
title => "Любимые русские пироги (Оксана Путан)",
// tf = 2 0
text => "http://i95.fastpic.ru/big/2017/0628/9a/615b9c8504d94a3893d7f496ac53539a.jpg \n\nОт издателя\nОксана Путан профессиональный повар, автор кулинарных книг и известный кулинарный блогер. Ее рецепты отличаются практичностью, доступностью и пользуются огромной популярностью в русскоязычном интернете. Это третья книга автора о самом вкусном и ароматном настоящих русских пирогах и выпечке!\nДаже новички на кухне легко готовят по ее рецептам. Оксана описывает процесс приготовления настолько подробно и понятно, что вам остается только наслаждаться готовкой и не тратить время на лишние усилия. Готовьте легко и просто!\n\nhttps://www.ozon.ru/context/detail/id/139872462/"
));
index_writer.add_document(doc!(
// tf = 1 1
title => "PDF Мастер Класс \"Морячок\" (Оксана Лифенко)",
// tf = 0 0
text => "https://i.ibb.co/pzvHrDN/I3d U T6 Gg TM.jpg\nhttps://i.ibb.co/NFrb6v6/N0ls Z9nwjb U.jpg\nВ описание входит штаны, кофта, берет, матросский воротник. Описание продается в формате PDF, состоит из 12 страниц формата А4 и может быть напечатано на любом принтере.\nОписание предназначено для кукол BJD RealPuki от FairyLand, но может подойти и другим подобным куклам. Также вы можете вязать этот наряд из обычной пряжи, и он подойдет для куколок побольше.\nhttps://vk.com/market 95724412?w=product 95724412_2212"
));
for _ in 0..1_000 {
index_writer.add_document(doc!(
title => "a b d e f g",
text => "maitre corbeau sur un arbre perche tenait dans son bec un fromage Maitre rnard par lodeur alleche lui tint a peu pres ce langage."
));
}
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let query_parser = QueryParser::for_index(&index, vec![title, text]);
let query = query_parser
.parse_query("Оксана Лифенко")
.unwrap();
let weight = query.weight(&searcher, true).unwrap();
let mut scorer = weight.scorer(searcher.segment_reader(0u32)).unwrap();
scorer.advance();
let explanation = query.explain(&searcher, DocAddress(0u32, 0u32)).unwrap();
assert_eq!(
explanation.to_pretty_json(),
r#"{
"value": 12.997711,
"description": "BooleanClause. Sum of ...",
"details": [
{
"value": 12.997711,
"description": "BooleanClause. Sum of ...",
"details": [
{
"value": 6.551476,
"description": "TermQuery, product of...",
"details": [
{
"value": 2.2,
"description": "(K1+1)"
},
{
"value": 5.658984,
"description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5))",
"details": [
{
"value": 3.0,
"description": "n, number of docs containing this term"
},
{
"value": 1003.0,
"description": "N, total number of docs"
}
]
},
{
"value": 0.5262329,
"description": "freq / (freq + k1 * (1 - b + b * dl / avgdl))",
"details": [
{
"value": 1.0,
"description": "freq, occurrences of term within document"
},
{
"value": 1.2,
"description": "k1, term saturation parameter"
},
{
"value": 0.75,
"description": "b, length normalization parameter"
},
{
"value": 4.0,
"description": "dl, length of field"
},
{
"value": 5.997009,
"description": "avgdl, average length of field"
}
]
}
]
},
{
"value": 6.446235,
"description": "TermQuery, product of...",
"details": [
{
"value": 2.2,
"description": "(K1+1)"
},
{
"value": 5.9954567,
"description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5))",
"details": [
{
"value": 2.0,
"description": "n, number of docs containing this term"
},
{
"value": 1003.0,
"description": "N, total number of docs"
}
]
},
{
"value": 0.4887212,
"description": "freq / (freq + k1 * (1 - b + b * dl / avgdl))",
"details": [
{
"value": 1.0,
"description": "freq, occurrences of term within document"
},
{
"value": 1.2,
"description": "k1, term saturation parameter"
},
{
"value": 0.75,
"description": "b, length normalization parameter"
},
{
"value": 20.0,
"description": "dl, length of field"
},
{
"value": 24.123629,
"description": "avgdl, average length of field"
}
]
}
]
}
]
}
]
}"#
);
}
}

View File

@@ -1,6 +1,7 @@
use super::Scorer;
use query::Query;
use query::explanation::does_not_match;
use query::Weight;
use query::{Explanation, Query};
use DocId;
use DocSet;
use Result;
@@ -32,6 +33,10 @@ impl Weight for EmptyWeight {
fn scorer(&self, _reader: &SegmentReader) -> Result<Box<Scorer>> {
Ok(Box::new(EmptyScorer))
}
fn explain(&self, _reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
Err(does_not_match(doc))
}
}
/// `EmptyScorer` is a dummy `Scorer` in which no document matches.

51
src/query/explanation.rs Normal file
View File

@@ -0,0 +1,51 @@
use {DocId, TantivyError};
pub(crate) fn does_not_match(doc: DocId) -> TantivyError {
TantivyError::InvalidArgument(format!("Document #({}) does not match", doc))
}
/// Object describing the score of a given document.
/// It is organized in trees.
///
/// `.to_pretty_json()` can be useful to print out a human readable
/// representation of this tree when debugging a given score.
#[derive(Clone, Serialize)]
pub struct Explanation {
value: f32,
description: String,
#[serde(skip_serializing_if = "Vec::is_empty")]
details: Vec<Explanation>,
}
impl Explanation {
/// Creates a new explanation object.
pub fn new<T: ToString>(description: T, value: f32) -> Explanation {
Explanation {
value,
description: description.to_string(),
details: vec![],
}
}
/// Returns the value associated to the current node.
pub fn value(&self) -> f32 {
self.value
}
/// Add some detail, explaining some part of the current node formula.
///
/// Details are treated as child of the current node.
pub fn add_detail(&mut self, child_explanation: Explanation) {
self.details.push(child_explanation);
}
/// Shortcut for `self.details.push(Explanation::new(name, value));`
pub fn add_const<T: ToString>(&mut self, name: T, value: f32) {
self.details.push(Explanation::new(name, value));
}
/// Returns an indented json representation of the explanation tree for debug usage.
pub fn to_pretty_json(&self) -> String {
serde_json::to_string_pretty(self).unwrap()
}
}

View File

@@ -9,6 +9,7 @@ mod bm25;
mod boolean_query;
mod empty_query;
mod exclude;
mod explanation;
mod fuzzy_query;
mod intersection;
mod occur;
@@ -39,6 +40,7 @@ pub use self::bitset::BitSetDocSet;
pub use self::boolean_query::BooleanQuery;
pub use self::empty_query::{EmptyQuery, EmptyScorer, EmptyWeight};
pub use self::exclude::Exclude;
pub use self::explanation::Explanation;
pub use self::fuzzy_query::FuzzyTermQuery;
pub use self::intersection::intersect_scorers;
pub use self::occur::Occur;

View File

@@ -93,21 +93,12 @@ impl Query for PhraseQuery {
field_name
)));
}
if scoring_enabled {
let terms = self.phrase_terms();
let bm25_weight = BM25Weight::for_terms(searcher, &terms);
Ok(Box::new(PhraseWeight::new(
self.phrase_terms.clone(),
bm25_weight,
true,
)))
} else {
Ok(Box::new(PhraseWeight::new(
self.phrase_terms.clone(),
BM25Weight::null(),
false,
)))
}
let terms = self.phrase_terms();
let bm25_weight = BM25Weight::for_terms(searcher, &terms);
let phrase_weight: PhraseWeight =
PhraseWeight::new(self.phrase_terms.clone(), bm25_weight, scoring_enabled);
Ok(Box::new(phrase_weight))
}
fn query_terms(&self, term_set: &mut BTreeSet<Term>) {

View File

@@ -148,9 +148,13 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
}
}
pub fn phrase_count(&self) -> u32 {
self.phrase_count
}
fn phrase_match(&mut self) -> bool {
if self.score_needed {
let count = self.phrase_count();
let count = self.compute_phrase_count();
self.phrase_count = count;
count > 0u32
} else {
@@ -183,7 +187,7 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
intersection_exists(&self.left[..intersection_len], &self.right[..])
}
fn phrase_count(&mut self) -> u32 {
fn compute_phrase_count(&mut self) -> u32 {
{
self.intersection_docset
.docset_mut_specialized(0)

View File

@@ -1,12 +1,16 @@
use super::PhraseScorer;
use core::SegmentReader;
use fieldnorm::FieldNormReader;
use postings::SegmentPostings;
use query::bm25::BM25Weight;
use query::EmptyScorer;
use query::explanation::does_not_match;
use query::Scorer;
use query::Weight;
use query::{EmptyScorer, Explanation};
use schema::IndexRecordOption;
use schema::Term;
use Result;
use {DocId, DocSet};
use {Result, SkipResult};
pub struct PhraseWeight {
phrase_terms: Vec<(usize, Term)>,
@@ -27,13 +31,18 @@ impl PhraseWeight {
score_needed,
}
}
}
impl Weight for PhraseWeight {
fn scorer(&self, reader: &SegmentReader) -> Result<Box<Scorer>> {
let similarity_weight = self.similarity_weight.clone();
fn fieldnorm_reader(&self, reader: &SegmentReader) -> FieldNormReader {
let field = self.phrase_terms[0].1.field();
let fieldnorm_reader = reader.get_fieldnorms_reader(field);
reader.get_fieldnorms_reader(field)
}
fn phrase_scorer(
&self,
reader: &SegmentReader,
) -> Result<Option<PhraseScorer<SegmentPostings>>> {
let similarity_weight = self.similarity_weight.clone();
let fieldnorm_reader = self.fieldnorm_reader(reader);
if reader.has_deletes() {
let mut term_postings_list = Vec::new();
for &(offset, ref term) in &self.phrase_terms {
@@ -43,10 +52,10 @@ impl Weight for PhraseWeight {
{
term_postings_list.push((offset, postings));
} else {
return Ok(Box::new(EmptyScorer));
return Ok(None);
}
}
Ok(Box::new(PhraseScorer::new(
Ok(Some(PhraseScorer::new(
term_postings_list,
similarity_weight,
fieldnorm_reader,
@@ -61,10 +70,10 @@ impl Weight for PhraseWeight {
{
term_postings_list.push((offset, postings));
} else {
return Ok(Box::new(EmptyScorer));
return Ok(None);
}
}
Ok(Box::new(PhraseScorer::new(
Ok(Some(PhraseScorer::new(
term_postings_list,
similarity_weight,
fieldnorm_reader,
@@ -73,3 +82,30 @@ impl Weight for PhraseWeight {
}
}
}
impl Weight for PhraseWeight {
fn scorer(&self, reader: &SegmentReader) -> Result<Box<Scorer>> {
if let Some(scorer) = self.phrase_scorer(reader)? {
Ok(Box::new(scorer))
} else {
Ok(Box::new(EmptyScorer))
}
}
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
let scorer_opt = self.phrase_scorer(reader)?;
if scorer_opt.is_none() {
return Err(does_not_match(doc));
}
let mut scorer = scorer_opt.unwrap();
if scorer.skip_next(doc) != SkipResult::Reached {
return Err(does_not_match(doc));
}
let fieldnorm_reader = self.fieldnorm_reader(reader);
let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc);
let phrase_count = scorer.phrase_count();
let mut explanation = Explanation::new("Phrase Scorer", scorer.score());
explanation.add_detail(self.similarity_weight.explain(fieldnorm_id, phrase_count));
Ok(explanation)
}
}

View File

@@ -1,10 +1,11 @@
use super::Weight;
use core::searcher::Searcher;
use downcast_rs;
use query::Explanation;
use std::collections::BTreeSet;
use std::fmt;
use Result;
use Term;
use {downcast_rs, DocAddress};
/// The `Query` trait defines a set of documents and a scoring method
/// for those documents.
@@ -48,6 +49,13 @@ pub trait Query: QueryClone + downcast_rs::Downcast + fmt::Debug {
/// See [`Weight`](./trait.Weight.html).
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> Result<Box<Weight>>;
/// Returns an `Explanation` for the score of the document.
fn explain(&self, searcher: &Searcher, doc_address: DocAddress) -> Result<Explanation> {
let reader = searcher.segment_reader(doc_address.segment_ord());
let weight = self.weight(searcher, true)?;
weight.explain(reader, doc_address.doc())
}
/// Returns the number of documents matching the query.
fn count(&self, searcher: &Searcher) -> Result<usize> {
let weight = self.weight(searcher, false)?;

View File

@@ -2,15 +2,17 @@ use common::BitSet;
use core::Searcher;
use core::SegmentReader;
use error::TantivyError;
use query::BitSetDocSet;
use query::explanation::does_not_match;
use query::ConstScorer;
use query::{BitSetDocSet, Explanation};
use query::{Query, Scorer, Weight};
use schema::Type;
use schema::{Field, IndexRecordOption, Term};
use std::collections::Bound;
use std::ops::Range;
use termdict::{TermDictionary, TermStreamer};
use Result;
use DocId;
use {Result, SkipResult};
fn map_bound<TFrom, TTo, Transform: Fn(&TFrom) -> TTo>(
bound: &Bound<TFrom>,
@@ -286,6 +288,14 @@ impl Weight for RangeWeight {
let doc_bitset = BitSetDocSet::from(doc_bitset);
Ok(Box::new(ConstScorer::new(doc_bitset)))
}
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
let mut scorer = self.scorer(reader)?;
if scorer.skip_next(doc) != SkipResult::Reached {
return Err(does_not_match(doc));
}
Ok(Explanation::new("RangeQuery", 1.0f32))
}
}
#[cfg(test)]

View File

@@ -1,5 +1,5 @@
use docset::{DocSet, SkipResult};
use query::Scorer;
use query::{Explanation, Scorer};
use DocId;
use Score;
@@ -28,11 +28,31 @@ impl TermScorer {
}
}
impl TermScorer {
pub fn term_freq(&self) -> u32 {
self.postings.term_freq()
}
pub fn fieldnorm_id(&self) -> u8 {
self.fieldnorm_reader.fieldnorm_id(self.doc())
}
pub fn explain(&self) -> Explanation {
let fieldnorm_id = self.fieldnorm_id();
let term_freq = self.term_freq();
self.similarity_weight.explain(fieldnorm_id, term_freq)
}
}
impl DocSet for TermScorer {
fn advance(&mut self) -> bool {
self.postings.advance()
}
fn skip_next(&mut self, target: DocId) -> SkipResult {
self.postings.skip_next(target)
}
fn doc(&self) -> DocId {
self.postings.doc()
}
@@ -40,17 +60,12 @@ impl DocSet for TermScorer {
fn size_hint(&self) -> u32 {
self.postings.size_hint()
}
fn skip_next(&mut self, target: DocId) -> SkipResult {
self.postings.skip_next(target)
}
}
impl Scorer for TermScorer {
fn score(&mut self) -> Score {
let doc = self.doc();
let fieldnorm_id = self.fieldnorm_reader.fieldnorm_id(doc);
self.similarity_weight
.score(fieldnorm_id, self.postings.term_freq())
let fieldnorm_id = self.fieldnorm_id();
let term_freq = self.term_freq();
self.similarity_weight.score(fieldnorm_id, term_freq)
}
}

View File

@@ -3,11 +3,13 @@ use core::SegmentReader;
use docset::DocSet;
use postings::SegmentPostings;
use query::bm25::BM25Weight;
use query::Scorer;
use query::explanation::does_not_match;
use query::Weight;
use query::{Explanation, Scorer};
use schema::IndexRecordOption;
use Result;
use DocId;
use Term;
use {Result, SkipResult};
pub struct TermWeight {
term: Term,
@@ -17,25 +19,16 @@ pub struct TermWeight {
impl Weight for TermWeight {
fn scorer(&self, reader: &SegmentReader) -> Result<Box<Scorer>> {
let field = self.term.field();
let inverted_index = reader.inverted_index(field);
let fieldnorm_reader = reader.get_fieldnorms_reader(field);
let similarity_weight = self.similarity_weight.clone();
let postings_opt: Option<SegmentPostings> =
inverted_index.read_postings(&self.term, self.index_record_option);
if let Some(segment_postings) = postings_opt {
Ok(Box::new(TermScorer::new(
segment_postings,
fieldnorm_reader,
similarity_weight,
)))
} else {
Ok(Box::new(TermScorer::new(
SegmentPostings::empty(),
fieldnorm_reader,
similarity_weight,
)))
let term_scorer = self.scorer_specialized(reader)?;
Ok(Box::new(term_scorer))
}
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
let mut scorer = self.scorer_specialized(reader)?;
if scorer.skip_next(doc) != SkipResult::Reached {
return Err(does_not_match(doc));
}
Ok(scorer.explain())
}
fn count(&self, reader: &SegmentReader) -> Result<u32> {
@@ -64,4 +57,26 @@ impl TermWeight {
similarity_weight,
}
}
fn scorer_specialized(&self, reader: &SegmentReader) -> Result<TermScorer> {
let field = self.term.field();
let inverted_index = reader.inverted_index(field);
let fieldnorm_reader = reader.get_fieldnorms_reader(field);
let similarity_weight = self.similarity_weight.clone();
let postings_opt: Option<SegmentPostings> =
inverted_index.read_postings(&self.term, self.index_record_option);
if let Some(segment_postings) = postings_opt {
Ok(TermScorer::new(
segment_postings,
fieldnorm_reader,
similarity_weight,
))
} else {
Ok(TermScorer::new(
SegmentPostings::empty(),
fieldnorm_reader,
similarity_weight,
))
}
}
}

View File

@@ -1,6 +1,7 @@
use super::Scorer;
use core::SegmentReader;
use Result;
use query::Explanation;
use {DocId, Result};
/// A Weight is the specialization of a Query
/// for a given set of segments.
@@ -11,6 +12,9 @@ pub trait Weight: Send + Sync + 'static {
/// See [`Query`](./trait.Query.html).
fn scorer(&self, reader: &SegmentReader) -> Result<Box<Scorer>>;
/// Returns an `Explanation` for the given document.
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation>;
/// Returns the number documents within the given `SegmentReader`.
fn count(&self, reader: &SegmentReader) -> Result<u32> {
let mut scorer = self.scorer(reader)?;