mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-03 00:50:41 +00:00
111 lines
3.4 KiB
Rust
111 lines
3.4 KiB
Rust
use Result;
|
|
use core::SegmentReader;
|
|
use schema::Document;
|
|
use collector::Collector;
|
|
use common::TimerTree;
|
|
use query::Query;
|
|
use DocAddress;
|
|
use schema::{Field, Term};
|
|
use termdict::{TermDictionary, TermMerger};
|
|
use std::sync::Arc;
|
|
use std::fmt;
|
|
use core::InvertedIndexReader;
|
|
|
|
/// Holds a list of `SegmentReader`s ready for search.
|
|
///
|
|
/// It guarantees that the `Segment` will not be removed before
|
|
/// the destruction of the `Searcher`.
|
|
///
|
|
pub struct Searcher {
|
|
segment_readers: Vec<SegmentReader>,
|
|
}
|
|
|
|
impl Searcher {
|
|
/// Fetches a document from tantivy's store given a `DocAddress`.
|
|
///
|
|
/// The searcher uses the segment ordinal to route the
|
|
/// the request to the right `Segment`.
|
|
pub fn doc(&self, doc_address: &DocAddress) -> Result<Document> {
|
|
let DocAddress(segment_local_id, doc_id) = *doc_address;
|
|
let segment_reader = &self.segment_readers[segment_local_id as usize];
|
|
segment_reader.doc(doc_id)
|
|
}
|
|
|
|
/// Returns the overall number of documents in the index.
|
|
pub fn num_docs(&self) -> u64 {
|
|
self.segment_readers
|
|
.iter()
|
|
.map(|segment_reader| segment_reader.num_docs() as u64)
|
|
.sum::<u64>()
|
|
}
|
|
|
|
/// Return the overall number of documents containing
|
|
/// the given term.
|
|
pub fn doc_freq(&self, term: &Term) -> u64 {
|
|
self.segment_readers
|
|
.iter()
|
|
.map(|segment_reader| segment_reader.inverted_index(term.field()).doc_freq(term) as u64)
|
|
.sum::<u64>()
|
|
}
|
|
|
|
/// Return the list of segment readers
|
|
pub fn segment_readers(&self) -> &[SegmentReader] {
|
|
&self.segment_readers
|
|
}
|
|
|
|
/// Returns the segment_reader associated with the given segment_ordinal
|
|
pub fn segment_reader(&self, segment_ord: u32) -> &SegmentReader {
|
|
&self.segment_readers[segment_ord as usize]
|
|
}
|
|
|
|
/// Runs a query on the segment readers wrapped by the searcher
|
|
pub fn search<C: Collector>(&self, query: &Query, collector: &mut C) -> Result<TimerTree> {
|
|
query.search(self, collector)
|
|
}
|
|
|
|
/// Return the field searcher associated to a `Field`.
|
|
pub fn field(&self, field: Field) -> FieldSearcher {
|
|
let inv_index_readers = self.segment_readers
|
|
.iter()
|
|
.map(|segment_reader| segment_reader.inverted_index(field))
|
|
.collect::<Vec<_>>();
|
|
FieldSearcher::new(inv_index_readers)
|
|
}
|
|
}
|
|
|
|
pub struct FieldSearcher {
|
|
inv_index_readers: Vec<Arc<InvertedIndexReader>>,
|
|
}
|
|
|
|
impl FieldSearcher {
|
|
fn new(inv_index_readers: Vec<Arc<InvertedIndexReader>>) -> FieldSearcher {
|
|
FieldSearcher { inv_index_readers }
|
|
}
|
|
|
|
/// Returns a Stream over all of the sorted unique terms of
|
|
/// for the given field.
|
|
pub fn terms(&self) -> TermMerger {
|
|
let term_streamers: Vec<_> = self.inv_index_readers
|
|
.iter()
|
|
.map(|inverted_index| inverted_index.terms().stream())
|
|
.collect();
|
|
TermMerger::new(term_streamers)
|
|
}
|
|
}
|
|
|
|
impl From<Vec<SegmentReader>> for Searcher {
|
|
fn from(segment_readers: Vec<SegmentReader>) -> Searcher {
|
|
Searcher { segment_readers }
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for Searcher {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
let segment_ids = self.segment_readers
|
|
.iter()
|
|
.map(|segment_reader| segment_reader.segment_id())
|
|
.collect::<Vec<_>>();
|
|
write!(f, "Searcher({:?})", segment_ids)
|
|
}
|
|
}
|