diff --git a/src/core/searcher.rs b/src/core/searcher.rs index 76c318a2c..9fade4cc5 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -7,7 +7,9 @@ use query::Query; use DocId; use DocAddress; use schema::Term; +use datastruct::fstmap::FstMerger; use std::fmt; +use postings::TermInfo; /// Holds a list of `SegmentReader`s ready for search. @@ -62,6 +64,18 @@ impl Searcher { pub fn search(&self, query: &Query, collector: &mut C) -> Result { query.search(self, collector) } + + /// Returns a Stream over all of the sorted unique terms of + /// the searcher. + /// + /// This includes all of the fields from all of the segment_readers. + /// See [TermIterator](struct.TermIterator.html). + /// + /// # Warning + /// This API is very likely to change in the future. + pub fn terms(&self) -> FstMerger { + FstMerger::from(self.segment_readers()) + } } diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 5a0a836e7..8ca96cbf8 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -171,7 +171,7 @@ impl SegmentReader { } /// Return the term dictionary datastructure. - pub fn term_infos<'b>(&'b self) -> &'b FstMap { + pub fn term_infos(&self) -> &FstMap { &self.term_infos } @@ -227,11 +227,6 @@ impl SegmentReader { _ => { SegmentPostingsOption::NoFreq }, }; Some(self.read_postings_from_terminfo(&term_info, possible_option)) - - // SegmentPostings::from_data(term_info.doc_freq, - // postings_data, - // &self.delete_bitset, - // freq_handler)) } pub fn read_postings_from_terminfo(&self, diff --git a/src/datastruct/fstmap/fstmerger.rs b/src/datastruct/fstmap/fstmerger.rs index 90f2264d7..687ba5127 100644 --- a/src/datastruct/fstmap/fstmerger.rs +++ b/src/datastruct/fstmap/fstmerger.rs @@ -4,7 +4,7 @@ use super::FstMapStreamer; use common::BinarySerializable; use postings::TermInfo; use std::cmp::Ordering; - +use fst::Streamer; pub struct HeapItem<'a, V> where V: 'a + BinarySerializable { pub streamer: FstMapStreamer<'a, V>, @@ -117,35 +117,6 @@ impl<'a, V> FstMerger<'a, V> where V: 'a + BinarySerializable { - - /* - - - /// Returns the sorted list of segment ordinals - /// that include the current term. - /// - /// This method may be called - /// iff advance() has been called before - /// and "true" was returned. - pub fn segment_ords(&self) -> &[usize] { - &self.current_segment_ords[..] - } - */ - -/* -impl<'a, V> Streamer<'a> for FstMerger<'a, V> { - type Item = &'a Term; - - fn next(&'a mut self) -> Option { - if self.advance() { - Some(&self.current_term) - } else { - None - } - } -} -*/ - impl<'a> From<&'a [SegmentReader]> for FstMerger<'a, TermInfo> where TermInfo: BinarySerializable { fn from(segment_readers: &'a [SegmentReader]) -> FstMerger<'a, TermInfo> { FstMerger::new(segment_readers @@ -155,52 +126,67 @@ impl<'a> From<&'a [SegmentReader]> for FstMerger<'a, TermInfo> where TermInfo: B } } -// #[cfg(test)] -// mod tests { -// use super::*; -// use schema::{SchemaBuilder, Document, TEXT}; -// use core::Index; +impl<'a, V> Streamer<'a> for FstMerger<'a, V> where V: BinarySerializable { + type Item = &'a [u8]; -// #[test] -// fn test_term_iterator() { -// let mut schema_builder = SchemaBuilder::default(); -// let text_field = schema_builder.add_text_field("text", TEXT); -// let index = Index::create_in_ram(schema_builder.build()); -// { -// let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); -// { -// { -// let mut doc = Document::default(); -// doc.add_text(text_field, "a b d f"); -// index_writer.add_document(doc); -// } -// index_writer.commit().unwrap(); -// } -// { -// { -// let mut doc = Document::default(); -// doc.add_text(text_field, "a b c d f"); -// index_writer.add_document(doc); -// } -// index_writer.commit().unwrap(); -// } -// { -// { -// let mut doc = Document::default(); -// doc.add_text(text_field, "e f"); -// index_writer.add_document(doc); -// } -// index_writer.commit().unwrap(); -// } -// } -// index.load_searchers().unwrap(); -// let searcher = index.searcher(); -// let mut term_it = searcher.terms(); -// let mut terms = String::new(); -// while let Some(term) = term_it.next() { -// terms.push_str(term.text()); -// } -// assert_eq!(terms, "abcdef"); -// } + fn next(&'a mut self) -> Option { + if self.advance() { + Some(&self.current_streamers[0].streamer.key()) + } + else { + None + } + + } +} -// } +#[cfg(test)] +mod tests { + use super::*; + use schema::{Term, SchemaBuilder, Document, TEXT}; + use core::Index; + + #[test] + fn test_term_iterator() { + let mut schema_builder = SchemaBuilder::default(); + let text_field = schema_builder.add_text_field("text", TEXT); + let index = Index::create_in_ram(schema_builder.build()); + { + let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + { + { + let mut doc = Document::default(); + doc.add_text(text_field, "a b d f"); + index_writer.add_document(doc); + } + index_writer.commit().unwrap(); + } + { + { + let mut doc = Document::default(); + doc.add_text(text_field, "a b c d f"); + index_writer.add_document(doc); + } + index_writer.commit().unwrap(); + } + { + { + let mut doc = Document::default(); + doc.add_text(text_field, "e f"); + index_writer.add_document(doc); + } + index_writer.commit().unwrap(); + } + } + index.load_searchers().unwrap(); + let searcher = index.searcher(); + let mut term_it = searcher.terms(); + let mut term_string = String::new(); + while term_it.advance() { + let term = Term::from_bytes(term_it.key()); + term_string.push_str(term.text()); + } + assert_eq!(&*term_string, "abcdef"); + } + +}