This commit is contained in:
Paul Masurel
2016-03-08 08:24:54 +09:00
parent 87a9e6a0be
commit 1caff6e63f
4 changed files with 32 additions and 35 deletions

View File

@@ -1,16 +1,16 @@
use core::schema::DocId;
use core::reader::SegmentReader;
use core::index::SegmentId;
use core::searcher::SegmentLocalId;
use core::searcher::DocAddress;
pub trait Collector {
fn set_segment(&mut self, segment: &SegmentReader);
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader);
fn collect(&mut self, doc_id: DocId);
}
pub struct FirstNCollector {
docs: Vec<DocAddress>,
current_segment: Option<SegmentId>,
current_segment: u32,
limit: usize,
}
@@ -19,7 +19,7 @@ impl FirstNCollector {
FirstNCollector {
docs: Vec::new(),
limit: limit,
current_segment: None,
current_segment: 0,
}
}
@@ -30,13 +30,13 @@ impl FirstNCollector {
impl Collector for FirstNCollector {
fn set_segment(&mut self, segment: &SegmentReader) {
self.current_segment = Some(segment.id());
fn set_segment(&mut self, segment_local_id: SegmentLocalId, _: &SegmentReader) {
self.current_segment = segment_local_id;
}
fn collect(&mut self, doc_id: DocId) {
if self.docs.len() < self.limit {
self.docs.push(DocAddress(self.current_segment.clone().unwrap(), doc_id));
self.docs.push(DocAddress(self.current_segment.clone(), doc_id));
}
}
}
@@ -59,7 +59,7 @@ impl CountCollector {
impl Collector for CountCollector {
fn set_segment(&mut self, _: &SegmentReader) {}
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) {}
fn collect(&mut self, _: DocId) {
self.count += 1;
@@ -84,9 +84,9 @@ impl<'a> MultiCollector<'a> {
impl<'a> Collector for MultiCollector<'a> {
fn set_segment(&mut self, segment: &SegmentReader) {
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) {
for collector in self.collectors.iter_mut() {
collector.set_segment(segment);
collector.set_segment(segment_local_id, segment);
}
}

View File

@@ -14,6 +14,7 @@ use std::io;
use std::str;
use core::codec::TermInfo;
use core::fstmap::FstMap;
use std::fmt;
use rustc_serialize::json;
use core::serial::SegmentSerializer;
use core::serial::SerializableSegment;
@@ -24,12 +25,19 @@ use core::convert_to_ioerror;
pub struct SegmentReader {
segment_info: SegmentInfo,
segment: Segment,
segment_id: SegmentId,
term_offsets: FstMap<TermInfo>,
postings_data: ReadOnlySource,
store_reader: StoreReader,
}
impl fmt::Debug for SegmentReader {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "SegmentReader({:?})", self.segment_id)
}
}
pub struct SegmentPostings {
doc_id: usize,
doc_ids: Vec<u32>,
@@ -101,12 +109,6 @@ impl Iterator for SegmentPostings {
impl SegmentReader {
/// Returns the associated segment id.
pub fn id(&self,) -> SegmentId {
self.segment.id()
}
/// Returns the highest document id ever attributed in
/// this segement + 1.
/// Today, `tantivy` does not handle deletes so, it happens
@@ -129,7 +131,7 @@ impl SegmentReader {
segment_info: segment_info,
postings_data: postings_shared_mmap,
term_offsets: term_offsets,
segment: segment,
segment_id: segment.id(),
store_reader: store_reader,
})
}

View File

@@ -1,46 +1,40 @@
use core::reader::SegmentReader;
use core::index::Index;
use core::index::Segment;
use core::index::SegmentId;
use core::schema::DocId;
use core::schema::Document;
use core::collector::Collector;
use std::collections::HashMap;
use std::io;
use core::schema::Term;
pub struct Searcher {
segments: Vec<SegmentReader>,
segments_idx: HashMap<SegmentId, usize>,
}
pub type SegmentLocalId = u32;
#[derive(Debug)]
pub struct DocAddress(pub SegmentId, pub DocId);
pub struct DocAddress(pub SegmentLocalId, pub DocId);
impl Searcher {
pub fn doc(&self, doc_address: &DocAddress) -> io::Result<Document> {
// TODO err
let DocAddress(ref segment_id, ref doc_id) = *doc_address;
let segment_ord = self.segments_idx.get(&segment_id).unwrap();
let segment_reader = &self.segments[segment_ord.clone()];
let DocAddress(ref segment_local_id, ref doc_id) = *doc_address;
let segment_reader = &self.segments[*segment_local_id as usize];
segment_reader.doc(doc_id)
}
fn add_segment(&mut self, segment: Segment) -> io::Result<()> {
SegmentReader::open(segment.clone())
.map(|segment_reader| {
let segment_ord = self.segments.len();
self.segments.push(segment_reader);
self.segments_idx.insert(segment.id(), segment_ord);
})
let segment_reader = try!(SegmentReader::open(segment.clone()));
self.segments.push(segment_reader);
Ok(())
}
fn new() -> Searcher {
Searcher {
segments: Vec::new(),
segments_idx: HashMap::new(),
}
}
@@ -53,8 +47,8 @@ impl Searcher {
}
pub fn search(&self, terms: &Vec<Term>, collector: &mut Collector) {
for segment in &self.segments {
collector.set_segment(&segment);
for (segment_ord, segment) in self.segments.iter().enumerate() {
collector.set_segment(segment_ord as SegmentLocalId, &segment);
let postings = segment.search(terms);
for doc_id in postings {
collector.collect(doc_id);

View File

@@ -36,6 +36,7 @@ pub use core::schema::FieldOptions;
pub use core::schema::Document;
pub use core::collector;
pub use core::reader::SegmentReader;
pub use core::searcher::SegmentLocalId;
#[cfg(test)]
@@ -64,7 +65,7 @@ mod tests {
impl Collector for TestCollector {
fn set_segment(&mut self, _: &SegmentReader) {}
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) {}
fn collect(&mut self, doc_id: DocId) {
self.docs.push(doc_id);