Better API

2026-01-04 16:22:55 +00:00 · 2017-05-18 23:33:15 +09:00
parent 2a08c247af
commit b3f62b8acc
5 changed files with 40 additions and 12 deletions
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -43,7 +43,7 @@ use schema::TextIndexingOptions;
 pub struct SegmentReader {
    segment_id: SegmentId,
    segment_meta: SegmentMeta,
-    term_infos: Arc<FstMap<TermInfo>>,
+    terms: Arc<FstMap<TermInfo>>,
    postings_data: ReadOnlySource,
    store_reader: StoreReader,
    fast_fields_reader: Arc<FastFieldsReader>,
@@ -135,7 +135,7 @@ impl SegmentReader {
    pub fn open(segment: Segment) -> Result<SegmentReader> {

        let source = try!(segment.open_read(SegmentComponent::TERMS));
-        let term_infos = try!(FstMap::from_source(source));
+        let terms = try!(FstMap::from_source(source));
        let store_reader = StoreReader::from(try!(segment.open_read(SegmentComponent::STORE)));
        let postings_shared_mmap = try!(segment.open_read(SegmentComponent::POSTINGS));

@@ -160,7 +160,7 @@ impl SegmentReader {
        Ok(SegmentReader {
               segment_meta: segment.meta().clone(),
               postings_data: postings_shared_mmap,
-               term_infos: Arc::new(term_infos),
+               terms: Arc::new(terms),
               segment_id: segment.id(),
               store_reader: store_reader,
               fast_fields_reader: Arc::new(fast_fields_reader),
@@ -172,8 +172,8 @@ impl SegmentReader {
    }

    /// Return the term dictionary datastructure.
-    pub fn term_infos(&self) -> &FstMap<TermInfo> {
-        &self.term_infos
+    pub fn terms(&self) -> &FstMap<TermInfo> {
+        &self.terms
    }

    /// Returns the document (or to be accurate, its stored field)
@@ -259,7 +259,7 @@ impl SegmentReader {

    /// Returns the term info associated with the term.
    pub fn get_term_info(&self, term: &Term) -> Option<TermInfo> {
-        self.term_infos.get(term.as_slice())
+        self.terms.get(term.as_slice())
    }

    /// Returns the segment id
--- a/src/datastruct/fstmap/fstmap.rs
+++ b/src/datastruct/fstmap/fstmap.rs
@@ -5,6 +5,7 @@ use super::{FstMapStreamerBuilder, FstMapStreamer};
 use directory::ReadOnlySource;
 use common::BinarySerializable;
 use std::marker::PhantomData;
+use schema::{Field, Term};


 fn convert_fst_error(e: fst::Error) -> io::Error {
@@ -104,22 +105,47 @@ impl<V> FstMap<V>
           })
    }

-    pub(crate) fn read_value(&self, offset: u64) -> V {
+
+    /// In the `FstMap`, the dictionary itself associated
+    /// each key `&[u8]` to a `u64` that is in fact the address
+    /// of the value object in a data array.
+    ///
+    /// This method deserialize this object, and returns it.
+    pub(crate) fn read_value(&self, offset: u64) -> io::Result<V> {
        let buffer = self.values_mmap.as_slice();
        let mut cursor = &buffer[(offset as usize)..];
-        V::deserialize(&mut cursor).expect("Data in FST is corrupted")
+        V::deserialize(&mut cursor)
    }

+    /// Returns, if present the value associated to a given key.
    pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<V> {
        self.fst_index
            .get(key)
-            .map(|offset| self.read_value(offset))
+            .map(|offset| {
+                     self.read_value(offset)
+                         .expect("The fst is corrupted. Failed to deserialize a value.")
+                 })
    }

+
+    /// Returns a stream of all the sorted terms.
    pub fn stream(&self) -> FstMapStreamer<V> {
        self.range().into_stream()
    }

+
+    /// Returns a stream of all the sorted terms in the given field.
+    pub fn stream_field(&self, field: Field) -> FstMapStreamer<V> {
+        let start_term = Term::from_field_text(field, "");
+        let stop_term = Term::from_field_text(Field(field.0 + 1), "");
+        self.range()
+            .ge(start_term.as_slice())
+            .lt(stop_term.as_slice())
+            .into_stream()
+    }
+
+    /// Returns a range builder, to stream all of the terms
+    /// within an interval.
    pub fn range(&self) -> FstMapStreamerBuilder<V> {
        FstMapStreamerBuilder::new(self, self.fst_index.range())
    }
--- a/src/datastruct/fstmap/fstmerger.rs
+++ b/src/datastruct/fstmap/fstmerger.rs
@@ -136,7 +136,7 @@ impl<'a> From<&'a [SegmentReader]> for FstMerger<'a, TermInfo>
    fn from(segment_readers: &'a [SegmentReader]) -> FstMerger<'a, TermInfo> {
        FstMerger::new(segment_readers
                           .iter()
-                           .map(|reader| reader.term_infos().stream())
+                           .map(|reader| reader.terms().stream())
                           .collect())
    }
 }
--- a/src/datastruct/fstmap/streamer.rs
+++ b/src/datastruct/fstmap/streamer.rs
@@ -99,6 +99,8 @@ impl<'a, V> FstMapStreamer<'a, V>
    }

    pub fn value(&self) -> V {
-        self.fst_map.read_value(self.offset)
+        self.fst_map
+            .read_value(self.offset)
+            .expect("Fst data is corrupted. Failed to deserialize a value.")
    }
 }
--- a/src/schema/field_type.rs
+++ b/src/schema/field_type.rs
@@ -49,7 +49,7 @@ impl FieldType {
            FieldType::Str(ref text_options) => {
                match text_options.get_indexing_options() {
                    TextIndexingOptions::Untokenized |
-                    TextIndexingOptions::TokenizedNoFreq   => Some(SegmentPostingsOption::NoFreq),
+                    TextIndexingOptions::TokenizedNoFreq => Some(SegmentPostingsOption::NoFreq),
                    TextIndexingOptions::TokenizedWithFreq => Some(SegmentPostingsOption::Freq),
                    TextIndexingOptions::TokenizedWithFreqAndPosition => {
                        Some(SegmentPostingsOption::FreqAndPositions)