mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 16:22:55 +00:00
Better API
This commit is contained in:
@@ -43,7 +43,7 @@ use schema::TextIndexingOptions;
|
||||
pub struct SegmentReader {
|
||||
segment_id: SegmentId,
|
||||
segment_meta: SegmentMeta,
|
||||
term_infos: Arc<FstMap<TermInfo>>,
|
||||
terms: Arc<FstMap<TermInfo>>,
|
||||
postings_data: ReadOnlySource,
|
||||
store_reader: StoreReader,
|
||||
fast_fields_reader: Arc<FastFieldsReader>,
|
||||
@@ -135,7 +135,7 @@ impl SegmentReader {
|
||||
pub fn open(segment: Segment) -> Result<SegmentReader> {
|
||||
|
||||
let source = try!(segment.open_read(SegmentComponent::TERMS));
|
||||
let term_infos = try!(FstMap::from_source(source));
|
||||
let terms = try!(FstMap::from_source(source));
|
||||
let store_reader = StoreReader::from(try!(segment.open_read(SegmentComponent::STORE)));
|
||||
let postings_shared_mmap = try!(segment.open_read(SegmentComponent::POSTINGS));
|
||||
|
||||
@@ -160,7 +160,7 @@ impl SegmentReader {
|
||||
Ok(SegmentReader {
|
||||
segment_meta: segment.meta().clone(),
|
||||
postings_data: postings_shared_mmap,
|
||||
term_infos: Arc::new(term_infos),
|
||||
terms: Arc::new(terms),
|
||||
segment_id: segment.id(),
|
||||
store_reader: store_reader,
|
||||
fast_fields_reader: Arc::new(fast_fields_reader),
|
||||
@@ -172,8 +172,8 @@ impl SegmentReader {
|
||||
}
|
||||
|
||||
/// Return the term dictionary datastructure.
|
||||
pub fn term_infos(&self) -> &FstMap<TermInfo> {
|
||||
&self.term_infos
|
||||
pub fn terms(&self) -> &FstMap<TermInfo> {
|
||||
&self.terms
|
||||
}
|
||||
|
||||
/// Returns the document (or to be accurate, its stored field)
|
||||
@@ -259,7 +259,7 @@ impl SegmentReader {
|
||||
|
||||
/// Returns the term info associated with the term.
|
||||
pub fn get_term_info(&self, term: &Term) -> Option<TermInfo> {
|
||||
self.term_infos.get(term.as_slice())
|
||||
self.terms.get(term.as_slice())
|
||||
}
|
||||
|
||||
/// Returns the segment id
|
||||
|
||||
@@ -5,6 +5,7 @@ use super::{FstMapStreamerBuilder, FstMapStreamer};
|
||||
use directory::ReadOnlySource;
|
||||
use common::BinarySerializable;
|
||||
use std::marker::PhantomData;
|
||||
use schema::{Field, Term};
|
||||
|
||||
|
||||
fn convert_fst_error(e: fst::Error) -> io::Error {
|
||||
@@ -104,22 +105,47 @@ impl<V> FstMap<V>
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn read_value(&self, offset: u64) -> V {
|
||||
|
||||
/// In the `FstMap`, the dictionary itself associated
|
||||
/// each key `&[u8]` to a `u64` that is in fact the address
|
||||
/// of the value object in a data array.
|
||||
///
|
||||
/// This method deserialize this object, and returns it.
|
||||
pub(crate) fn read_value(&self, offset: u64) -> io::Result<V> {
|
||||
let buffer = self.values_mmap.as_slice();
|
||||
let mut cursor = &buffer[(offset as usize)..];
|
||||
V::deserialize(&mut cursor).expect("Data in FST is corrupted")
|
||||
V::deserialize(&mut cursor)
|
||||
}
|
||||
|
||||
/// Returns, if present the value associated to a given key.
|
||||
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<V> {
|
||||
self.fst_index
|
||||
.get(key)
|
||||
.map(|offset| self.read_value(offset))
|
||||
.map(|offset| {
|
||||
self.read_value(offset)
|
||||
.expect("The fst is corrupted. Failed to deserialize a value.")
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
/// Returns a stream of all the sorted terms.
|
||||
pub fn stream(&self) -> FstMapStreamer<V> {
|
||||
self.range().into_stream()
|
||||
}
|
||||
|
||||
|
||||
/// Returns a stream of all the sorted terms in the given field.
|
||||
pub fn stream_field(&self, field: Field) -> FstMapStreamer<V> {
|
||||
let start_term = Term::from_field_text(field, "");
|
||||
let stop_term = Term::from_field_text(Field(field.0 + 1), "");
|
||||
self.range()
|
||||
.ge(start_term.as_slice())
|
||||
.lt(stop_term.as_slice())
|
||||
.into_stream()
|
||||
}
|
||||
|
||||
/// Returns a range builder, to stream all of the terms
|
||||
/// within an interval.
|
||||
pub fn range(&self) -> FstMapStreamerBuilder<V> {
|
||||
FstMapStreamerBuilder::new(self, self.fst_index.range())
|
||||
}
|
||||
|
||||
@@ -136,7 +136,7 @@ impl<'a> From<&'a [SegmentReader]> for FstMerger<'a, TermInfo>
|
||||
fn from(segment_readers: &'a [SegmentReader]) -> FstMerger<'a, TermInfo> {
|
||||
FstMerger::new(segment_readers
|
||||
.iter()
|
||||
.map(|reader| reader.term_infos().stream())
|
||||
.map(|reader| reader.terms().stream())
|
||||
.collect())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,6 +99,8 @@ impl<'a, V> FstMapStreamer<'a, V>
|
||||
}
|
||||
|
||||
pub fn value(&self) -> V {
|
||||
self.fst_map.read_value(self.offset)
|
||||
self.fst_map
|
||||
.read_value(self.offset)
|
||||
.expect("Fst data is corrupted. Failed to deserialize a value.")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,7 +49,7 @@ impl FieldType {
|
||||
FieldType::Str(ref text_options) => {
|
||||
match text_options.get_indexing_options() {
|
||||
TextIndexingOptions::Untokenized |
|
||||
TextIndexingOptions::TokenizedNoFreq => Some(SegmentPostingsOption::NoFreq),
|
||||
TextIndexingOptions::TokenizedNoFreq => Some(SegmentPostingsOption::NoFreq),
|
||||
TextIndexingOptions::TokenizedWithFreq => Some(SegmentPostingsOption::Freq),
|
||||
TextIndexingOptions::TokenizedWithFreqAndPosition => {
|
||||
Some(SegmentPostingsOption::FreqAndPositions)
|
||||
|
||||
Reference in New Issue
Block a user