mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-31 07:30:39 +00:00
blop
This commit is contained in:
@@ -1,16 +1,16 @@
|
||||
use core::schema::DocId;
|
||||
use core::reader::SegmentReader;
|
||||
use core::index::SegmentId;
|
||||
use core::searcher::SegmentLocalId;
|
||||
use core::searcher::DocAddress;
|
||||
|
||||
pub trait Collector {
|
||||
fn set_segment(&mut self, segment: &SegmentReader);
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader);
|
||||
fn collect(&mut self, doc_id: DocId);
|
||||
}
|
||||
|
||||
pub struct FirstNCollector {
|
||||
docs: Vec<DocAddress>,
|
||||
current_segment: Option<SegmentId>,
|
||||
current_segment: u32,
|
||||
limit: usize,
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ impl FirstNCollector {
|
||||
FirstNCollector {
|
||||
docs: Vec::new(),
|
||||
limit: limit,
|
||||
current_segment: None,
|
||||
current_segment: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,13 +30,13 @@ impl FirstNCollector {
|
||||
|
||||
impl Collector for FirstNCollector {
|
||||
|
||||
fn set_segment(&mut self, segment: &SegmentReader) {
|
||||
self.current_segment = Some(segment.id());
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, _: &SegmentReader) {
|
||||
self.current_segment = segment_local_id;
|
||||
}
|
||||
|
||||
fn collect(&mut self, doc_id: DocId) {
|
||||
if self.docs.len() < self.limit {
|
||||
self.docs.push(DocAddress(self.current_segment.clone().unwrap(), doc_id));
|
||||
self.docs.push(DocAddress(self.current_segment.clone(), doc_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -59,7 +59,7 @@ impl CountCollector {
|
||||
|
||||
impl Collector for CountCollector {
|
||||
|
||||
fn set_segment(&mut self, _: &SegmentReader) {}
|
||||
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) {}
|
||||
|
||||
fn collect(&mut self, _: DocId) {
|
||||
self.count += 1;
|
||||
@@ -84,9 +84,9 @@ impl<'a> MultiCollector<'a> {
|
||||
|
||||
impl<'a> Collector for MultiCollector<'a> {
|
||||
|
||||
fn set_segment(&mut self, segment: &SegmentReader) {
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) {
|
||||
for collector in self.collectors.iter_mut() {
|
||||
collector.set_segment(segment);
|
||||
collector.set_segment(segment_local_id, segment);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ use std::io;
|
||||
use std::str;
|
||||
use core::codec::TermInfo;
|
||||
use core::fstmap::FstMap;
|
||||
use std::fmt;
|
||||
use rustc_serialize::json;
|
||||
use core::serial::SegmentSerializer;
|
||||
use core::serial::SerializableSegment;
|
||||
@@ -24,12 +25,19 @@ use core::convert_to_ioerror;
|
||||
|
||||
pub struct SegmentReader {
|
||||
segment_info: SegmentInfo,
|
||||
segment: Segment,
|
||||
segment_id: SegmentId,
|
||||
term_offsets: FstMap<TermInfo>,
|
||||
postings_data: ReadOnlySource,
|
||||
store_reader: StoreReader,
|
||||
}
|
||||
|
||||
|
||||
impl fmt::Debug for SegmentReader {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "SegmentReader({:?})", self.segment_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SegmentPostings {
|
||||
doc_id: usize,
|
||||
doc_ids: Vec<u32>,
|
||||
@@ -101,12 +109,6 @@ impl Iterator for SegmentPostings {
|
||||
|
||||
impl SegmentReader {
|
||||
|
||||
|
||||
/// Returns the associated segment id.
|
||||
pub fn id(&self,) -> SegmentId {
|
||||
self.segment.id()
|
||||
}
|
||||
|
||||
/// Returns the highest document id ever attributed in
|
||||
/// this segement + 1.
|
||||
/// Today, `tantivy` does not handle deletes so, it happens
|
||||
@@ -129,7 +131,7 @@ impl SegmentReader {
|
||||
segment_info: segment_info,
|
||||
postings_data: postings_shared_mmap,
|
||||
term_offsets: term_offsets,
|
||||
segment: segment,
|
||||
segment_id: segment.id(),
|
||||
store_reader: store_reader,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,46 +1,40 @@
|
||||
use core::reader::SegmentReader;
|
||||
use core::index::Index;
|
||||
use core::index::Segment;
|
||||
use core::index::SegmentId;
|
||||
use core::schema::DocId;
|
||||
use core::schema::Document;
|
||||
use core::collector::Collector;
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
use core::schema::Term;
|
||||
|
||||
|
||||
pub struct Searcher {
|
||||
segments: Vec<SegmentReader>,
|
||||
segments_idx: HashMap<SegmentId, usize>,
|
||||
}
|
||||
|
||||
pub type SegmentLocalId = u32;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DocAddress(pub SegmentId, pub DocId);
|
||||
pub struct DocAddress(pub SegmentLocalId, pub DocId);
|
||||
|
||||
impl Searcher {
|
||||
|
||||
pub fn doc(&self, doc_address: &DocAddress) -> io::Result<Document> {
|
||||
// TODO err
|
||||
let DocAddress(ref segment_id, ref doc_id) = *doc_address;
|
||||
let segment_ord = self.segments_idx.get(&segment_id).unwrap();
|
||||
let segment_reader = &self.segments[segment_ord.clone()];
|
||||
let DocAddress(ref segment_local_id, ref doc_id) = *doc_address;
|
||||
let segment_reader = &self.segments[*segment_local_id as usize];
|
||||
segment_reader.doc(doc_id)
|
||||
}
|
||||
|
||||
fn add_segment(&mut self, segment: Segment) -> io::Result<()> {
|
||||
SegmentReader::open(segment.clone())
|
||||
.map(|segment_reader| {
|
||||
let segment_ord = self.segments.len();
|
||||
self.segments.push(segment_reader);
|
||||
self.segments_idx.insert(segment.id(), segment_ord);
|
||||
})
|
||||
let segment_reader = try!(SegmentReader::open(segment.clone()));
|
||||
self.segments.push(segment_reader);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn new() -> Searcher {
|
||||
Searcher {
|
||||
segments: Vec::new(),
|
||||
segments_idx: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,8 +47,8 @@ impl Searcher {
|
||||
}
|
||||
|
||||
pub fn search(&self, terms: &Vec<Term>, collector: &mut Collector) {
|
||||
for segment in &self.segments {
|
||||
collector.set_segment(&segment);
|
||||
for (segment_ord, segment) in self.segments.iter().enumerate() {
|
||||
collector.set_segment(segment_ord as SegmentLocalId, &segment);
|
||||
let postings = segment.search(terms);
|
||||
for doc_id in postings {
|
||||
collector.collect(doc_id);
|
||||
|
||||
@@ -36,6 +36,7 @@ pub use core::schema::FieldOptions;
|
||||
pub use core::schema::Document;
|
||||
pub use core::collector;
|
||||
pub use core::reader::SegmentReader;
|
||||
pub use core::searcher::SegmentLocalId;
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -64,7 +65,7 @@ mod tests {
|
||||
|
||||
impl Collector for TestCollector {
|
||||
|
||||
fn set_segment(&mut self, _: &SegmentReader) {}
|
||||
fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) {}
|
||||
|
||||
fn collect(&mut self, doc_id: DocId) {
|
||||
self.docs.push(doc_id);
|
||||
|
||||
Reference in New Issue
Block a user