This commit is contained in:
Paul Masurel
2017-05-15 22:30:18 +09:00
parent a23b7a1815
commit 4c8f9742f8
111 changed files with 2791 additions and 2888 deletions

View File

@@ -9,7 +9,7 @@ pub use self::term_scorer::TermScorer;
#[cfg(test)]
mod tests {
use postings::{DocSet, VecPostings};
use query::Scorer;
use query::term_query::TermScorer;
@@ -23,7 +23,7 @@ mod tests {
fn abs_diff(left: f32, right: f32) -> f32 {
(right - left).abs()
}
}
#[test]
@@ -44,7 +44,8 @@ mod tests {
index.load_searchers().unwrap();
let searcher = index.searcher();
let term_query = TermQuery::new(Term::from_field_text(text_field, "a"), SegmentPostingsOption::NoFreq);
let term_query = TermQuery::new(Term::from_field_text(text_field, "a"),
SegmentPostingsOption::NoFreq);
let term_weight = term_query.weight(&searcher).unwrap();
let segment_reader = searcher.segment_reader(0);
let mut term_scorer = term_weight.scorer(segment_reader).unwrap();
@@ -53,13 +54,13 @@ mod tests {
assert_eq!(term_scorer.score(), 0.30685282);
}
#[test]
pub fn test_term_scorer() {
let left_fieldnorms = U64FastFieldReader::from(vec!(10, 4));
let left_fieldnorms = U64FastFieldReader::from(vec![10, 4]);
assert_eq!(left_fieldnorms.get(0), 10);
assert_eq!(left_fieldnorms.get(1), 4);
let left = VecPostings::from(vec!(1));
let left = VecPostings::from(vec![1]);
let mut left_scorer = TermScorer {
idf: 0.30685282,
fieldnorm_reader_opt: Some(left_fieldnorms),
@@ -69,4 +70,4 @@ mod tests {
assert!(abs_diff(left_scorer.score(), 0.15342641) < 0.001f32);
}
}
}

View File

@@ -13,7 +13,7 @@ use std::any::Any;
/// The score associated is defined as
/// `idf` * sqrt(`term_freq` / `field norm`)
/// in which :
/// * idf - inverse document frequency.
/// * idf - inverse document frequency.
/// * term_freq - number of occurrences of the term in the field
/// * field norm - number of tokens in the field.
#[derive(Debug)]
@@ -31,9 +31,9 @@ impl TermQuery {
}
}
/// Returns a weight object.
///
///
/// While `.weight(...)` returns a boxed trait object,
/// this method return a specific implementation.
/// This is useful for optimization purpose.
@@ -55,5 +55,4 @@ impl Query for TermQuery {
fn weight(&self, searcher: &Searcher) -> Result<Box<Weight>> {
Ok(box self.specialized_weight(searcher))
}
}

View File

@@ -6,41 +6,46 @@ use query::Scorer;
use postings::Postings;
use fastfield::FastFieldReader;
pub struct TermScorer<TPostings> where TPostings: Postings {
pub struct TermScorer<TPostings>
where TPostings: Postings
{
pub idf: Score,
pub fieldnorm_reader_opt: Option<U64FastFieldReader>,
pub postings: TPostings,
}
impl<TPostings> TermScorer<TPostings> where TPostings: Postings {
impl<TPostings> TermScorer<TPostings>
where TPostings: Postings
{
pub fn postings(&self) -> &TPostings {
&self.postings
}
}
impl<TPostings> DocSet for TermScorer<TPostings> where TPostings: Postings {
fn advance(&mut self,) -> bool {
impl<TPostings> DocSet for TermScorer<TPostings>
where TPostings: Postings
{
fn advance(&mut self) -> bool {
self.postings.advance()
}
fn doc(&self,) -> DocId {
fn doc(&self) -> DocId {
self.postings.doc()
}
}
impl<TPostings> Scorer for TermScorer<TPostings> where TPostings: Postings {
fn score(&self,) -> Score {
impl<TPostings> Scorer for TermScorer<TPostings>
where TPostings: Postings
{
fn score(&self) -> Score {
let doc = self.postings.doc();
let tf = match self.fieldnorm_reader_opt {
Some(ref fieldnorm_reader) => {
let field_norm = fieldnorm_reader.get(doc);
(self.postings.term_freq() as f32 / field_norm as f32)
}
None => {
self.postings.term_freq() as f32
}
None => self.postings.term_freq() as f32,
};
self.idf * tf.sqrt()
}
}
}

View File

@@ -16,40 +16,35 @@ pub struct TermWeight {
impl Weight for TermWeight {
fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
let specialized_scorer = try!(self.specialized_scorer(reader));
Ok(box specialized_scorer)
}
}
impl TermWeight {
fn idf(&self) -> f32 {
1.0 + (self.num_docs as f32 / (self.doc_freq as f32 + 1.0)).ln()
}
pub fn specialized_scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<TermScorer<SegmentPostings<'a>>> {
pub fn specialized_scorer<'a>(&'a self,
reader: &'a SegmentReader)
-> Result<TermScorer<SegmentPostings<'a>>> {
let field = self.term.field();
let fieldnorm_reader_opt = reader.get_fieldnorms_reader(field);
Ok(
reader
.read_postings(&self.term, self.segment_postings_options)
.map(|segment_postings|
TermScorer {
idf: self.idf(),
fieldnorm_reader_opt: fieldnorm_reader_opt,
postings: segment_postings,
}
)
.unwrap_or(
TermScorer {
idf: 1f32,
fieldnorm_reader_opt: None,
postings: SegmentPostings::empty()
Ok(reader
.read_postings(&self.term, self.segment_postings_options)
.map(|segment_postings| {
TermScorer {
idf: self.idf(),
fieldnorm_reader_opt: fieldnorm_reader_opt,
postings: segment_postings,
}
})
)
.unwrap_or(TermScorer {
idf: 1f32,
fieldnorm_reader_opt: None,
postings: SegmentPostings::empty(),
}))
}
}
}