Code cleaning

This commit is contained in:
Paul Masurel
2017-05-18 23:06:02 +09:00
parent ca76fd5ba0
commit 0272167c2e
7 changed files with 58 additions and 43 deletions

View File

@@ -12,6 +12,7 @@ use schema::Document;
use directory::ReadOnlySource;
use DocId;
use std::str;
use std::cmp;
use postings::TermInfo;
use datastruct::fstmap::FstMap;
use std::sync::Arc;
@@ -201,34 +202,16 @@ impl SegmentReader {
let field = term.field();
let field_entry = self.schema.get_field_entry(field);
let term_info = get!(self.get_term_info(term));
let possible_option = match *field_entry.field_type() {
FieldType::Str(ref options) => {
let indexing_options = options.get_indexing_options();
match option {
SegmentPostingsOption::NoFreq => SegmentPostingsOption::NoFreq,
SegmentPostingsOption::Freq => {
if indexing_options.is_termfreq_enabled() {
SegmentPostingsOption::Freq
} else {
SegmentPostingsOption::NoFreq
}
}
SegmentPostingsOption::FreqAndPositions => {
if indexing_options == TextIndexingOptions::TokenizedWithFreqAndPosition {
SegmentPostingsOption::FreqAndPositions
} else if indexing_options.is_termfreq_enabled() {
SegmentPostingsOption::Freq
} else {
SegmentPostingsOption::NoFreq
}
}
}
}
_ => { SegmentPostingsOption::NoFreq },
};
Some(self.read_postings_from_terminfo(&term_info, possible_option))
let maximum_option = get!(field_entry.field_type().get_segment_postings_option());
let best_effort_option = cmp::min(maximum_option, option);
Some(self.read_postings_from_terminfo(&term_info, best_effort_option))
}
/// Returns a posting object given a `term_info`.
/// This method is for an advanced usage only.
///
/// Most user should prefer using `read_postings` instead.
pub fn read_postings_from_terminfo(&self,
term_info: &TermInfo,
option: SegmentPostingsOption)

View File

@@ -129,7 +129,8 @@ mod tests {
use super::*;
use directory::{RAMDirectory, Directory};
use std::path::PathBuf;
use fst::Streamer;
#[test]
fn test_fstmap() {
let mut directory = RAMDirectory::create();
@@ -146,10 +147,12 @@ mod tests {
assert_eq!(fstmap.get("abc"), Some(34u32));
assert_eq!(fstmap.get("abcd"), Some(346u32));
let mut stream = fstmap.stream();
assert!(stream.advance());
assert_eq!(stream.next().unwrap(), "abc".as_bytes());
assert_eq!(stream.key(), "abc".as_bytes());
assert!(stream.advance());
assert_eq!(stream.value(), 34u32);
assert_eq!(stream.next().unwrap(), "abcd".as_bytes());
assert_eq!(stream.key(), "abcd".as_bytes());
assert_eq!(stream.value(), 346u32);
assert!(!stream.advance());
}

View File

@@ -142,7 +142,7 @@ impl<'a, V> Streamer<'a> for FstMerger<'a, V> where V: BinarySerializable {
#[cfg(test)]
mod tests {
use super::*;
use schema::{Term, SchemaBuilder, Document, TEXT};
use core::Index;

View File

@@ -59,11 +59,11 @@ pub struct FstMapStreamer<'a, V> where V: 'a + BinarySerializable {
}
impl<'a, V> fst::Streamer<'a> for FstMapStreamer<'a, V> where V: 'a + BinarySerializable {
impl<'a, 'b, V> fst::Streamer<'b> for FstMapStreamer<'a, V> where V: 'a + BinarySerializable {
type Item = &'a [u8];
type Item = &'b [u8];
fn next<'b>(&'b mut self) -> Option<&'b [u8]> {
fn next(&'b mut self) -> Option<&'b [u8]> {
if self.advance() {
Some(&self.buffer)
}

View File

@@ -232,7 +232,10 @@ impl IndexMerger {
// we reached a new field.
let field_entry = self.schema.get_field_entry(current_field);
// ... set segment postings option the new field.
segment_postings_option = field_entry.field_type().get_segment_postings_option();
segment_postings_option = field_entry
.field_type()
.get_segment_postings_option()
.expect("Encounterred a field that is not supposed to be indexed. Have you modified the index?");
last_field = Some(current_field);
need_to_call_new_field = true;
}

View File

@@ -6,7 +6,7 @@
/// avoid this extra cost when the information is not required.
/// For instance, positions are useful when running phrase queries
/// but useless in other queries.
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq)]
pub enum SegmentPostingsOption {
/// Only the doc ids are decoded
NoFreq,
@@ -15,3 +15,15 @@ pub enum SegmentPostingsOption {
/// DocIds, term frequencies and positions will be decoded.
FreqAndPositions,
}
#[cfg(test)]
mod tests {
use super::SegmentPostingsOption;
#[test]
fn test_cmp_segment_postings_option() {
assert!(SegmentPostingsOption::FreqAndPositions > SegmentPostingsOption::Freq);
assert!(SegmentPostingsOption::Freq > SegmentPostingsOption::NoFreq);
}
}

View File

@@ -40,23 +40,37 @@ impl FieldType {
}
}
pub fn get_segment_postings_option(&self) -> SegmentPostingsOption {
/// Given a field configuration, return the maximal possible
/// `SegmentPostingsOption` available.
///
/// If the field is not indexed, then returns `None`.
pub fn get_segment_postings_option(&self) -> Option<SegmentPostingsOption> {
match *self {
FieldType::Str(ref text_options) => {
match text_options.get_indexing_options() {
TextIndexingOptions::Untokenized =>
Some(SegmentPostingsOption::NoFreq),
TextIndexingOptions::TokenizedNoFreq =>
SegmentPostingsOption::NoFreq,
Some(SegmentPostingsOption::NoFreq),
TextIndexingOptions::TokenizedWithFreq =>
SegmentPostingsOption::Freq,
Some(SegmentPostingsOption::Freq),
TextIndexingOptions::TokenizedWithFreqAndPosition =>
SegmentPostingsOption::FreqAndPositions,
_ => {
SegmentPostingsOption::NoFreq
Some(SegmentPostingsOption::FreqAndPositions),
TextIndexingOptions::Unindexed => {
None
}
}
}
FieldType::U64(_) |
FieldType::I64(_) => SegmentPostingsOption::NoFreq,
FieldType::U64(ref int_options) |
FieldType::I64(ref int_options) => {
if int_options.is_indexed() {
Some(SegmentPostingsOption::NoFreq)
}
else {
None
}
},
}
}