diff --git a/src/core/collector.rs b/src/core/collector.rs index 1f2f6712e..b03c5a6a0 100644 --- a/src/core/collector.rs +++ b/src/core/collector.rs @@ -1,16 +1,16 @@ use core::schema::DocId; use core::reader::SegmentReader; -use core::index::SegmentId; +use core::searcher::SegmentLocalId; use core::searcher::DocAddress; pub trait Collector { - fn set_segment(&mut self, segment: &SegmentReader); + fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader); fn collect(&mut self, doc_id: DocId); } pub struct FirstNCollector { docs: Vec, - current_segment: Option, + current_segment: u32, limit: usize, } @@ -19,7 +19,7 @@ impl FirstNCollector { FirstNCollector { docs: Vec::new(), limit: limit, - current_segment: None, + current_segment: 0, } } @@ -30,13 +30,13 @@ impl FirstNCollector { impl Collector for FirstNCollector { - fn set_segment(&mut self, segment: &SegmentReader) { - self.current_segment = Some(segment.id()); + fn set_segment(&mut self, segment_local_id: SegmentLocalId, _: &SegmentReader) { + self.current_segment = segment_local_id; } fn collect(&mut self, doc_id: DocId) { if self.docs.len() < self.limit { - self.docs.push(DocAddress(self.current_segment.clone().unwrap(), doc_id)); + self.docs.push(DocAddress(self.current_segment.clone(), doc_id)); } } } @@ -59,7 +59,7 @@ impl CountCollector { impl Collector for CountCollector { - fn set_segment(&mut self, _: &SegmentReader) {} + fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) {} fn collect(&mut self, _: DocId) { self.count += 1; @@ -84,9 +84,9 @@ impl<'a> MultiCollector<'a> { impl<'a> Collector for MultiCollector<'a> { - fn set_segment(&mut self, segment: &SegmentReader) { + fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) { for collector in self.collectors.iter_mut() { - collector.set_segment(segment); + collector.set_segment(segment_local_id, segment); } } diff --git a/src/core/reader.rs b/src/core/reader.rs index f9a3f521f..a64c9b7a7 100644 --- a/src/core/reader.rs +++ b/src/core/reader.rs @@ -14,6 +14,7 @@ use std::io; use std::str; use core::codec::TermInfo; use core::fstmap::FstMap; +use std::fmt; use rustc_serialize::json; use core::serial::SegmentSerializer; use core::serial::SerializableSegment; @@ -24,12 +25,19 @@ use core::convert_to_ioerror; pub struct SegmentReader { segment_info: SegmentInfo, - segment: Segment, + segment_id: SegmentId, term_offsets: FstMap, postings_data: ReadOnlySource, store_reader: StoreReader, } + +impl fmt::Debug for SegmentReader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "SegmentReader({:?})", self.segment_id) + } +} + pub struct SegmentPostings { doc_id: usize, doc_ids: Vec, @@ -101,12 +109,6 @@ impl Iterator for SegmentPostings { impl SegmentReader { - - /// Returns the associated segment id. - pub fn id(&self,) -> SegmentId { - self.segment.id() - } - /// Returns the highest document id ever attributed in /// this segement + 1. /// Today, `tantivy` does not handle deletes so, it happens @@ -129,7 +131,7 @@ impl SegmentReader { segment_info: segment_info, postings_data: postings_shared_mmap, term_offsets: term_offsets, - segment: segment, + segment_id: segment.id(), store_reader: store_reader, }) } diff --git a/src/core/searcher.rs b/src/core/searcher.rs index 49b0ffb10..fa87dae72 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -1,46 +1,40 @@ use core::reader::SegmentReader; use core::index::Index; use core::index::Segment; -use core::index::SegmentId; use core::schema::DocId; use core::schema::Document; use core::collector::Collector; -use std::collections::HashMap; use std::io; use core::schema::Term; pub struct Searcher { segments: Vec, - segments_idx: HashMap, } +pub type SegmentLocalId = u32; + #[derive(Debug)] -pub struct DocAddress(pub SegmentId, pub DocId); +pub struct DocAddress(pub SegmentLocalId, pub DocId); impl Searcher { pub fn doc(&self, doc_address: &DocAddress) -> io::Result { // TODO err - let DocAddress(ref segment_id, ref doc_id) = *doc_address; - let segment_ord = self.segments_idx.get(&segment_id).unwrap(); - let segment_reader = &self.segments[segment_ord.clone()]; + let DocAddress(ref segment_local_id, ref doc_id) = *doc_address; + let segment_reader = &self.segments[*segment_local_id as usize]; segment_reader.doc(doc_id) } fn add_segment(&mut self, segment: Segment) -> io::Result<()> { - SegmentReader::open(segment.clone()) - .map(|segment_reader| { - let segment_ord = self.segments.len(); - self.segments.push(segment_reader); - self.segments_idx.insert(segment.id(), segment_ord); - }) + let segment_reader = try!(SegmentReader::open(segment.clone())); + self.segments.push(segment_reader); + Ok(()) } fn new() -> Searcher { Searcher { segments: Vec::new(), - segments_idx: HashMap::new(), } } @@ -53,8 +47,8 @@ impl Searcher { } pub fn search(&self, terms: &Vec, collector: &mut Collector) { - for segment in &self.segments { - collector.set_segment(&segment); + for (segment_ord, segment) in self.segments.iter().enumerate() { + collector.set_segment(segment_ord as SegmentLocalId, &segment); let postings = segment.search(terms); for doc_id in postings { collector.collect(doc_id); diff --git a/src/lib.rs b/src/lib.rs index 0db196060..ce1530a03 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,6 +36,7 @@ pub use core::schema::FieldOptions; pub use core::schema::Document; pub use core::collector; pub use core::reader::SegmentReader; +pub use core::searcher::SegmentLocalId; #[cfg(test)] @@ -64,7 +65,7 @@ mod tests { impl Collector for TestCollector { - fn set_segment(&mut self, _: &SegmentReader) {} + fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) {} fn collect(&mut self, doc_id: DocId) { self.docs.push(doc_id);