Code cleaning

2026-01-04 16:22:55 +00:00 · 2017-05-18 23:06:02 +09:00
parent ca76fd5ba0
commit 0272167c2e
7 changed files with 58 additions and 43 deletions
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -12,6 +12,7 @@ use schema::Document;
 use directory::ReadOnlySource;
 use DocId;
 use std::str;
+use std::cmp;
 use postings::TermInfo;
 use datastruct::fstmap::FstMap;
 use std::sync::Arc;
@@ -201,34 +202,16 @@ impl SegmentReader {
        let field = term.field();
        let field_entry = self.schema.get_field_entry(field);
        let term_info = get!(self.get_term_info(term));
-        let possible_option = match *field_entry.field_type() {
-            FieldType::Str(ref options) => {
-                let indexing_options = options.get_indexing_options();
-                match option {
-                    SegmentPostingsOption::NoFreq => SegmentPostingsOption::NoFreq,
-                    SegmentPostingsOption::Freq => {
-                        if indexing_options.is_termfreq_enabled() {
-                            SegmentPostingsOption::Freq
-                        } else {
-                            SegmentPostingsOption::NoFreq
-                        }
-                    }
-                    SegmentPostingsOption::FreqAndPositions => {
-                        if indexing_options == TextIndexingOptions::TokenizedWithFreqAndPosition {
-                            SegmentPostingsOption::FreqAndPositions
-                        } else if indexing_options.is_termfreq_enabled() {
-                            SegmentPostingsOption::Freq
-                        } else {
-                            SegmentPostingsOption::NoFreq
-                        }
-                    }
-                }
-            }
-            _ => { SegmentPostingsOption::NoFreq },
-        };
-        Some(self.read_postings_from_terminfo(&term_info, possible_option))
+        let maximum_option = get!(field_entry.field_type().get_segment_postings_option());
+        let best_effort_option = cmp::min(maximum_option, option);
+        Some(self.read_postings_from_terminfo(&term_info, best_effort_option))
    }

+
+    /// Returns a posting object given a `term_info`.
+    /// This method is for an advanced usage only.
+    ///
+    /// Most user should prefer using `read_postings` instead.
    pub fn read_postings_from_terminfo(&self,
                         term_info: &TermInfo,
                         option: SegmentPostingsOption)
--- a/src/datastruct/fstmap/fstmap.rs
+++ b/src/datastruct/fstmap/fstmap.rs
@@ -129,7 +129,8 @@ mod tests {
    use super::*;
    use directory::{RAMDirectory, Directory};
    use std::path::PathBuf;
-
+    use fst::Streamer;
+    
    #[test]
    fn test_fstmap() {
        let mut directory = RAMDirectory::create();
@@ -146,10 +147,12 @@ mod tests {
        assert_eq!(fstmap.get("abc"), Some(34u32));
        assert_eq!(fstmap.get("abcd"), Some(346u32));
        let mut stream = fstmap.stream();
-        assert!(stream.advance());
+        assert_eq!(stream.next().unwrap(), "abc".as_bytes());
        assert_eq!(stream.key(), "abc".as_bytes());
-        assert!(stream.advance());
+        assert_eq!(stream.value(), 34u32);
+        assert_eq!(stream.next().unwrap(), "abcd".as_bytes());
        assert_eq!(stream.key(), "abcd".as_bytes());
+        assert_eq!(stream.value(), 346u32);
        assert!(!stream.advance());
    }

--- a/src/datastruct/fstmap/fstmerger.rs
+++ b/src/datastruct/fstmap/fstmerger.rs
@@ -142,7 +142,7 @@ impl<'a, V> Streamer<'a> for FstMerger<'a, V> where V: BinarySerializable {

 #[cfg(test)]
 mod tests {
-    use super::*;
+    
    use schema::{Term, SchemaBuilder, Document, TEXT};
    use core::Index;

--- a/src/datastruct/fstmap/streamer.rs
+++ b/src/datastruct/fstmap/streamer.rs
@@ -59,11 +59,11 @@ pub struct FstMapStreamer<'a, V> where V: 'a + BinarySerializable {
 }


-impl<'a, V> fst::Streamer<'a> for FstMapStreamer<'a, V> where V: 'a + BinarySerializable {
+impl<'a, 'b, V> fst::Streamer<'b> for FstMapStreamer<'a, V> where V: 'a + BinarySerializable {
    
-    type Item = &'a [u8];
+    type Item = &'b [u8];
    
-    fn next<'b>(&'b mut self) -> Option<&'b [u8]> {
+    fn next(&'b mut self) -> Option<&'b [u8]> {
        if self.advance() {
            Some(&self.buffer)
        }
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -232,7 +232,10 @@ impl IndexMerger {
                // we reached a new field.
                let field_entry = self.schema.get_field_entry(current_field);
                // ... set segment postings option the new field.
-                segment_postings_option = field_entry.field_type().get_segment_postings_option();
+                segment_postings_option = field_entry
+                    .field_type()
+                    .get_segment_postings_option()
+                    .expect("Encounterred a field that is not supposed to be indexed. Have you modified the index?");
                last_field = Some(current_field);
                need_to_call_new_field = true;
            }
--- a/src/postings/segment_postings_option.rs
+++ b/src/postings/segment_postings_option.rs
@@ -6,7 +6,7 @@
 /// avoid this extra cost when the information is not required.
 /// For instance, positions are useful when running phrase queries
 /// but useless in other queries.
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq)]
 pub enum SegmentPostingsOption {
    /// Only the doc ids are decoded
    NoFreq,
@@ -15,3 +15,15 @@ pub enum SegmentPostingsOption {
    /// DocIds, term frequencies and positions will be decoded.
    FreqAndPositions,
 }
+
+#[cfg(test)]
+mod tests {
+    
+    use super::SegmentPostingsOption;
+
+    #[test]
+    fn test_cmp_segment_postings_option() {
+        assert!(SegmentPostingsOption::FreqAndPositions > SegmentPostingsOption::Freq);
+        assert!(SegmentPostingsOption::Freq > SegmentPostingsOption::NoFreq);
+    }
+}
--- a/src/schema/field_type.rs
+++ b/src/schema/field_type.rs
@@ -40,23 +40,37 @@ impl FieldType {
        }
    }

-    pub fn get_segment_postings_option(&self) -> SegmentPostingsOption {
+    /// Given a field configuration, return the maximal possible
+    /// `SegmentPostingsOption` available.
+    ///
+    /// If the field is not indexed, then returns `None`.
+    pub fn get_segment_postings_option(&self) -> Option<SegmentPostingsOption> {
        match *self {
            FieldType::Str(ref text_options) => {
                match text_options.get_indexing_options() {
+                    TextIndexingOptions::Untokenized =>
+                        Some(SegmentPostingsOption::NoFreq),
                    TextIndexingOptions::TokenizedNoFreq =>
-                        SegmentPostingsOption::NoFreq,
+                        Some(SegmentPostingsOption::NoFreq),
                    TextIndexingOptions::TokenizedWithFreq =>
-                        SegmentPostingsOption::Freq,
+                        Some(SegmentPostingsOption::Freq),
                    TextIndexingOptions::TokenizedWithFreqAndPosition =>
-                        SegmentPostingsOption::FreqAndPositions,
-                    _ => {
-                        SegmentPostingsOption::NoFreq
+                        Some(SegmentPostingsOption::FreqAndPositions),
+                    TextIndexingOptions::Unindexed => {
+                        None
                    }
                }
            }
-            FieldType::U64(_) |
-            FieldType::I64(_) => SegmentPostingsOption::NoFreq,
+            FieldType::U64(ref int_options) |
+            FieldType::I64(ref int_options) => {
+                if int_options.is_indexed() {
+                    Some(SegmentPostingsOption::NoFreq)
+                }
+                else {
+                    None
+                }
+                
+            },
        }
    }