From 6676fe5717f8e20683c8d38f68ee024267c4876b Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sat, 17 Feb 2018 15:02:51 +0900 Subject: [PATCH] Added a count method --- src/query/boolean_query/mod.rs | 44 ----------------------------- src/query/query.rs | 10 +++++++ src/query/score_combiner.rs | 13 +++++++++ src/query/scorer.rs | 13 +++++++++ src/query/term_query/term_weight.rs | 9 ++++++ src/query/weight.rs | 4 +++ 6 files changed, 49 insertions(+), 44 deletions(-) diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index e7c3b48ac..7c611ebed 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -1,15 +1,12 @@ mod boolean_query; -//mod boolean_scorer; mod boolean_weight; pub use self::boolean_query::BooleanQuery; -//pub use self::boolean_scorer::BooleanScorer; #[cfg(test)] mod tests { use super::*; - use query::Scorer; use query::Occur; use query::Query; use query::TermQuery; @@ -19,10 +16,6 @@ mod tests { use fastfield::U64FastFieldReader; use schema::IndexRecordOption; - fn abs_diff(left: f32, right: f32) -> f32 { - (right - left).abs() - } - #[test] pub fn test_boolean_query() { let mut schema_builder = SchemaBuilder::default(); @@ -107,41 +100,4 @@ mod tests { assert_eq!(matching_docs(&boolean_query), Vec::::new()); } } - -// #[test] -// pub fn test_boolean_scorer() { -// let occurs = vec![Occur::Should, Occur::Should]; -// let occur_filter = OccurFilter::new(&occurs); -// -// let left_fieldnorms = -// U64FastFieldReader::from((0u64..9u64).map(|doc| doc * 3).collect::>()); -// -// let left = VecPostings::from(vec![1, 2, 3]); -// let left_scorer = TermScorer { -// idf: 1f32, -// fieldnorm_reader_opt: Some(left_fieldnorms), -// postings: left, -// }; -// -// let right_fieldnorms = -// U64FastFieldReader::from((0u64..9u64).map(|doc| doc * 5).collect::>()); -// let right = VecPostings::from(vec![1, 3, 8]); -// -// let right_scorer = TermScorer { -// idf: 4f32, -// fieldnorm_reader_opt: Some(right_fieldnorms), -// postings: right, -// }; -// -// let mut boolean_scorer = BooleanScorer::new(vec![left_scorer, right_scorer], occur_filter); -// assert_eq!(boolean_scorer.next(), Some(1u32)); -// assert!(abs_diff(boolean_scorer.score(), 2.3662047) < 0.001); -// assert_eq!(boolean_scorer.next(), Some(2u32)); -// assert!(abs_diff(boolean_scorer.score(), 0.20412415) < 0.001f32); -// assert_eq!(boolean_scorer.next(), Some(3u32)); -// assert_eq!(boolean_scorer.next(), Some(8u32)); -// assert!(abs_diff(boolean_scorer.score(), 0.31622776) < 0.001f32); -// assert!(!boolean_scorer.advance()); -// } - } diff --git a/src/query/query.rs b/src/query/query.rs index b0af34fde..e01e60409 100644 --- a/src/query/query.rs +++ b/src/query/query.rs @@ -53,6 +53,16 @@ pub trait Query: fmt::Debug { /// See [`Weight`](./trait.Weight.html). fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> Result>; + + fn count(&self, searcher: &Searcher) -> Result { + let weight = self.weight(searcher, false)?; + let mut result = 0; + for reader in searcher.segment_readers() { + result += weight.count(reader)? as usize; + } + Ok(result) + } + /// Search works as follows : /// /// First the weight object associated to the query is created. diff --git a/src/query/score_combiner.rs b/src/query/score_combiner.rs index 0111d7b03..eb218e97d 100644 --- a/src/query/score_combiner.rs +++ b/src/query/score_combiner.rs @@ -1,12 +1,20 @@ use Score; use query::Scorer; + +/// The `ScoreCombiner` trait defines how to compute +/// an overall score given a list of scores. pub trait ScoreCombiner: Default + Clone + Copy { fn update(&mut self, scorer: &mut TScorer); fn clear(&mut self); fn score(&self) -> Score; } +/// Just ignores scores. The `DoNothingCombiner` does not +/// even call the scorers `.score()` function. +/// +/// It is useful to optimize the case when scoring is disabled. +/// #[derive(Default, Clone, Copy)] //< these should not be too much work :) pub struct DoNothingCombiner; @@ -20,6 +28,8 @@ impl ScoreCombiner for DoNothingCombiner { } } + +/// Sums the score of different scorers. #[derive(Default, Clone, Copy)] pub struct SumCombiner { score: Score @@ -40,6 +50,9 @@ impl ScoreCombiner for SumCombiner { } } + +/// Sums the score of different scorers and keeps the count +/// of scorers which matched. #[derive(Default, Clone, Copy)] pub struct SumWithCoordsCombiner { num_fields: usize, diff --git a/src/query/scorer.rs b/src/query/scorer.rs index b76b6187d..58574e405 100644 --- a/src/query/scorer.rs +++ b/src/query/scorer.rs @@ -22,6 +22,14 @@ pub trait Scorer: DocSet { collector.collect(self.doc(), self.score()); } } + + fn count(&mut self) -> u32 { + let mut count = 0u32; + while self.advance() { + count += 1u32; + } + count + } } impl<'a> Scorer for Box { @@ -33,6 +41,11 @@ impl<'a> Scorer for Box { let scorer = self.deref_mut(); scorer.collect(collector); } + + fn count(&mut self) -> u32 { + let scorer = self.deref_mut(); + scorer.count() + } } /// `EmptyScorer` is a dummy `Scorer` in which no document matches. diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index c6cf7e8f6..e212613a7 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -2,6 +2,7 @@ use Term; use query::Weight; use core::SegmentReader; use query::Scorer; +use DocSet; use postings::SegmentPostings; use schema::IndexRecordOption; use super::term_scorer::TermScorer; @@ -19,6 +20,14 @@ impl Weight for TermWeight { let specialized_scorer = self.specialized_scorer(reader)?; Ok(box specialized_scorer) } + + fn count(&self, reader: &SegmentReader) -> Result { + if reader.num_deleted_docs() == 0 { + Ok(self.doc_freq) + } else { + Ok(self.specialized_scorer(reader)?.count()) + } + } } impl TermWeight { diff --git a/src/query/weight.rs b/src/query/weight.rs index 8d9359119..0d81af56e 100644 --- a/src/query/weight.rs +++ b/src/query/weight.rs @@ -10,4 +10,8 @@ pub trait Weight { /// Returns the scorer for the given segment. /// See [`Query`](./trait.Query.html). fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result>; + + fn count(&self, reader: &SegmentReader) -> Result { + Ok(self.scorer(reader)?.count()) + } }