Better API

This commit is contained in:
Paul Masurel
2017-05-18 23:33:15 +09:00
parent 2a08c247af
commit b3f62b8acc
5 changed files with 40 additions and 12 deletions

View File

@@ -43,7 +43,7 @@ use schema::TextIndexingOptions;
pub struct SegmentReader {
segment_id: SegmentId,
segment_meta: SegmentMeta,
term_infos: Arc<FstMap<TermInfo>>,
terms: Arc<FstMap<TermInfo>>,
postings_data: ReadOnlySource,
store_reader: StoreReader,
fast_fields_reader: Arc<FastFieldsReader>,
@@ -135,7 +135,7 @@ impl SegmentReader {
pub fn open(segment: Segment) -> Result<SegmentReader> {
let source = try!(segment.open_read(SegmentComponent::TERMS));
let term_infos = try!(FstMap::from_source(source));
let terms = try!(FstMap::from_source(source));
let store_reader = StoreReader::from(try!(segment.open_read(SegmentComponent::STORE)));
let postings_shared_mmap = try!(segment.open_read(SegmentComponent::POSTINGS));
@@ -160,7 +160,7 @@ impl SegmentReader {
Ok(SegmentReader {
segment_meta: segment.meta().clone(),
postings_data: postings_shared_mmap,
term_infos: Arc::new(term_infos),
terms: Arc::new(terms),
segment_id: segment.id(),
store_reader: store_reader,
fast_fields_reader: Arc::new(fast_fields_reader),
@@ -172,8 +172,8 @@ impl SegmentReader {
}
/// Return the term dictionary datastructure.
pub fn term_infos(&self) -> &FstMap<TermInfo> {
&self.term_infos
pub fn terms(&self) -> &FstMap<TermInfo> {
&self.terms
}
/// Returns the document (or to be accurate, its stored field)
@@ -259,7 +259,7 @@ impl SegmentReader {
/// Returns the term info associated with the term.
pub fn get_term_info(&self, term: &Term) -> Option<TermInfo> {
self.term_infos.get(term.as_slice())
self.terms.get(term.as_slice())
}
/// Returns the segment id

View File

@@ -5,6 +5,7 @@ use super::{FstMapStreamerBuilder, FstMapStreamer};
use directory::ReadOnlySource;
use common::BinarySerializable;
use std::marker::PhantomData;
use schema::{Field, Term};
fn convert_fst_error(e: fst::Error) -> io::Error {
@@ -104,22 +105,47 @@ impl<V> FstMap<V>
})
}
pub(crate) fn read_value(&self, offset: u64) -> V {
/// In the `FstMap`, the dictionary itself associated
/// each key `&[u8]` to a `u64` that is in fact the address
/// of the value object in a data array.
///
/// This method deserialize this object, and returns it.
pub(crate) fn read_value(&self, offset: u64) -> io::Result<V> {
let buffer = self.values_mmap.as_slice();
let mut cursor = &buffer[(offset as usize)..];
V::deserialize(&mut cursor).expect("Data in FST is corrupted")
V::deserialize(&mut cursor)
}
/// Returns, if present the value associated to a given key.
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<V> {
self.fst_index
.get(key)
.map(|offset| self.read_value(offset))
.map(|offset| {
self.read_value(offset)
.expect("The fst is corrupted. Failed to deserialize a value.")
})
}
/// Returns a stream of all the sorted terms.
pub fn stream(&self) -> FstMapStreamer<V> {
self.range().into_stream()
}
/// Returns a stream of all the sorted terms in the given field.
pub fn stream_field(&self, field: Field) -> FstMapStreamer<V> {
let start_term = Term::from_field_text(field, "");
let stop_term = Term::from_field_text(Field(field.0 + 1), "");
self.range()
.ge(start_term.as_slice())
.lt(stop_term.as_slice())
.into_stream()
}
/// Returns a range builder, to stream all of the terms
/// within an interval.
pub fn range(&self) -> FstMapStreamerBuilder<V> {
FstMapStreamerBuilder::new(self, self.fst_index.range())
}

View File

@@ -136,7 +136,7 @@ impl<'a> From<&'a [SegmentReader]> for FstMerger<'a, TermInfo>
fn from(segment_readers: &'a [SegmentReader]) -> FstMerger<'a, TermInfo> {
FstMerger::new(segment_readers
.iter()
.map(|reader| reader.term_infos().stream())
.map(|reader| reader.terms().stream())
.collect())
}
}

View File

@@ -99,6 +99,8 @@ impl<'a, V> FstMapStreamer<'a, V>
}
pub fn value(&self) -> V {
self.fst_map.read_value(self.offset)
self.fst_map
.read_value(self.offset)
.expect("Fst data is corrupted. Failed to deserialize a value.")
}
}

View File

@@ -49,7 +49,7 @@ impl FieldType {
FieldType::Str(ref text_options) => {
match text_options.get_indexing_options() {
TextIndexingOptions::Untokenized |
TextIndexingOptions::TokenizedNoFreq => Some(SegmentPostingsOption::NoFreq),
TextIndexingOptions::TokenizedNoFreq => Some(SegmentPostingsOption::NoFreq),
TextIndexingOptions::TokenizedWithFreq => Some(SegmentPostingsOption::Freq),
TextIndexingOptions::TokenizedWithFreqAndPosition => {
Some(SegmentPostingsOption::FreqAndPositions)