From e20acd4ef6fd038d8911066f0eaecf3d58846c5a Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Tue, 5 Jul 2016 21:37:35 +0900 Subject: [PATCH] Added doc_freq to searcher. Sums the value for all readers. --- src/core/searcher.rs | 8 +++++++ src/core/segment_reader.rs | 7 ++++++ src/lib.rs | 48 +++++++++++++++++++++++++++++++++++++- 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/core/searcher.rs b/src/core/searcher.rs index f3d04e65d..656386c60 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -8,6 +8,7 @@ use std::io; use common::TimerTree; use postings::Postings; use query::Query; +use schema::Term; #[derive(Debug)] pub struct Searcher { @@ -30,6 +31,13 @@ impl Searcher { segment_reader.doc(doc_id) } + pub fn doc_freq(&self, term: &Term) -> u32 { + self.segments + .iter() + .map(|segment_reader| segment_reader.doc_freq(term)) + .sum() + } + fn add_segment(&mut self, segment: Segment) -> io::Result<()> { let segment_reader = try!(SegmentReader::open(segment.clone())); self.segments.push(segment_reader); diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 3cd2a3a6b..91e800f1e 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -57,6 +57,13 @@ impl SegmentReader { } + pub fn doc_freq(&self, term: &Term) -> u32 { + match self.get_term_info(term) { + Some(term_info) => term_info.doc_freq, + None => 0, + } + } + pub fn get_store_reader(&self) -> &StoreReader { &self.store_reader } diff --git a/src/lib.rs b/src/lib.rs index a54361765..cf7e8bdee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ #![cfg_attr(test, feature(test))] #![cfg_attr(test, feature(step_by))] #![doc(test(attr(allow(unused_variables), deny(warnings))))] +#![feature(iter_arith)] #[macro_use] @@ -106,9 +107,54 @@ mod tests { } - #[test] fn test_docfreq() { + let mut schema = schema::Schema::new(); + let text_field = schema.add_text_field("text", schema::TEXT); + let index = Index::create_in_ram(schema); + { + let mut index_writer = index.writer_with_num_threads(1).unwrap(); + let mut doc = Document::new(); + doc.add_text(text_field, "a b c"); + index_writer.add_document(doc).unwrap(); + index_writer.wait().unwrap(); + } + { + let mut index_writer = index.writer_with_num_threads(1).unwrap(); + { + let mut doc = Document::new(); + doc.add_text(text_field, "a"); + index_writer.add_document(doc).unwrap(); + } + { + let mut doc = Document::new(); + doc.add_text(text_field, "a a"); + index_writer.add_document(doc).unwrap(); + } + index_writer.wait().unwrap(); + } + { + let mut index_writer = index.writer_with_num_threads(1).unwrap(); + let mut doc = Document::new(); + doc.add_text(text_field, "c"); + index_writer.add_document(doc).unwrap(); + index_writer.wait().unwrap(); + } + { + let searcher = index.searcher().unwrap(); + let term_a = Term::from_field_text(text_field, "a"); + assert_eq!(searcher.doc_freq(&term_a), 3); + let term_b = Term::from_field_text(text_field, "b"); + assert_eq!(searcher.doc_freq(&term_b), 1); + let term_c = Term::from_field_text(text_field, "c"); + assert_eq!(searcher.doc_freq(&term_c), 2); + let term_d = Term::from_field_text(text_field, "d"); + assert_eq!(searcher.doc_freq(&term_d), 0); + } + } + + #[test] + fn test_termfreq() { let mut schema = schema::Schema::new(); let text_field = schema.add_text_field("text", schema::TEXT); let index = Index::create_in_ram(schema);