mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 16:22:55 +00:00
Code cleaning
This commit is contained in:
@@ -12,6 +12,7 @@ use schema::Document;
|
||||
use directory::ReadOnlySource;
|
||||
use DocId;
|
||||
use std::str;
|
||||
use std::cmp;
|
||||
use postings::TermInfo;
|
||||
use datastruct::fstmap::FstMap;
|
||||
use std::sync::Arc;
|
||||
@@ -201,34 +202,16 @@ impl SegmentReader {
|
||||
let field = term.field();
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
let term_info = get!(self.get_term_info(term));
|
||||
let possible_option = match *field_entry.field_type() {
|
||||
FieldType::Str(ref options) => {
|
||||
let indexing_options = options.get_indexing_options();
|
||||
match option {
|
||||
SegmentPostingsOption::NoFreq => SegmentPostingsOption::NoFreq,
|
||||
SegmentPostingsOption::Freq => {
|
||||
if indexing_options.is_termfreq_enabled() {
|
||||
SegmentPostingsOption::Freq
|
||||
} else {
|
||||
SegmentPostingsOption::NoFreq
|
||||
}
|
||||
}
|
||||
SegmentPostingsOption::FreqAndPositions => {
|
||||
if indexing_options == TextIndexingOptions::TokenizedWithFreqAndPosition {
|
||||
SegmentPostingsOption::FreqAndPositions
|
||||
} else if indexing_options.is_termfreq_enabled() {
|
||||
SegmentPostingsOption::Freq
|
||||
} else {
|
||||
SegmentPostingsOption::NoFreq
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => { SegmentPostingsOption::NoFreq },
|
||||
};
|
||||
Some(self.read_postings_from_terminfo(&term_info, possible_option))
|
||||
let maximum_option = get!(field_entry.field_type().get_segment_postings_option());
|
||||
let best_effort_option = cmp::min(maximum_option, option);
|
||||
Some(self.read_postings_from_terminfo(&term_info, best_effort_option))
|
||||
}
|
||||
|
||||
|
||||
/// Returns a posting object given a `term_info`.
|
||||
/// This method is for an advanced usage only.
|
||||
///
|
||||
/// Most user should prefer using `read_postings` instead.
|
||||
pub fn read_postings_from_terminfo(&self,
|
||||
term_info: &TermInfo,
|
||||
option: SegmentPostingsOption)
|
||||
|
||||
@@ -129,7 +129,8 @@ mod tests {
|
||||
use super::*;
|
||||
use directory::{RAMDirectory, Directory};
|
||||
use std::path::PathBuf;
|
||||
|
||||
use fst::Streamer;
|
||||
|
||||
#[test]
|
||||
fn test_fstmap() {
|
||||
let mut directory = RAMDirectory::create();
|
||||
@@ -146,10 +147,12 @@ mod tests {
|
||||
assert_eq!(fstmap.get("abc"), Some(34u32));
|
||||
assert_eq!(fstmap.get("abcd"), Some(346u32));
|
||||
let mut stream = fstmap.stream();
|
||||
assert!(stream.advance());
|
||||
assert_eq!(stream.next().unwrap(), "abc".as_bytes());
|
||||
assert_eq!(stream.key(), "abc".as_bytes());
|
||||
assert!(stream.advance());
|
||||
assert_eq!(stream.value(), 34u32);
|
||||
assert_eq!(stream.next().unwrap(), "abcd".as_bytes());
|
||||
assert_eq!(stream.key(), "abcd".as_bytes());
|
||||
assert_eq!(stream.value(), 346u32);
|
||||
assert!(!stream.advance());
|
||||
}
|
||||
|
||||
|
||||
@@ -142,7 +142,7 @@ impl<'a, V> Streamer<'a> for FstMerger<'a, V> where V: BinarySerializable {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use schema::{Term, SchemaBuilder, Document, TEXT};
|
||||
use core::Index;
|
||||
|
||||
|
||||
@@ -59,11 +59,11 @@ pub struct FstMapStreamer<'a, V> where V: 'a + BinarySerializable {
|
||||
}
|
||||
|
||||
|
||||
impl<'a, V> fst::Streamer<'a> for FstMapStreamer<'a, V> where V: 'a + BinarySerializable {
|
||||
impl<'a, 'b, V> fst::Streamer<'b> for FstMapStreamer<'a, V> where V: 'a + BinarySerializable {
|
||||
|
||||
type Item = &'a [u8];
|
||||
type Item = &'b [u8];
|
||||
|
||||
fn next<'b>(&'b mut self) -> Option<&'b [u8]> {
|
||||
fn next(&'b mut self) -> Option<&'b [u8]> {
|
||||
if self.advance() {
|
||||
Some(&self.buffer)
|
||||
}
|
||||
|
||||
@@ -232,7 +232,10 @@ impl IndexMerger {
|
||||
// we reached a new field.
|
||||
let field_entry = self.schema.get_field_entry(current_field);
|
||||
// ... set segment postings option the new field.
|
||||
segment_postings_option = field_entry.field_type().get_segment_postings_option();
|
||||
segment_postings_option = field_entry
|
||||
.field_type()
|
||||
.get_segment_postings_option()
|
||||
.expect("Encounterred a field that is not supposed to be indexed. Have you modified the index?");
|
||||
last_field = Some(current_field);
|
||||
need_to_call_new_field = true;
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
/// avoid this extra cost when the information is not required.
|
||||
/// For instance, positions are useful when running phrase queries
|
||||
/// but useless in other queries.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq)]
|
||||
pub enum SegmentPostingsOption {
|
||||
/// Only the doc ids are decoded
|
||||
NoFreq,
|
||||
@@ -15,3 +15,15 @@ pub enum SegmentPostingsOption {
|
||||
/// DocIds, term frequencies and positions will be decoded.
|
||||
FreqAndPositions,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::SegmentPostingsOption;
|
||||
|
||||
#[test]
|
||||
fn test_cmp_segment_postings_option() {
|
||||
assert!(SegmentPostingsOption::FreqAndPositions > SegmentPostingsOption::Freq);
|
||||
assert!(SegmentPostingsOption::Freq > SegmentPostingsOption::NoFreq);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,23 +40,37 @@ impl FieldType {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_segment_postings_option(&self) -> SegmentPostingsOption {
|
||||
/// Given a field configuration, return the maximal possible
|
||||
/// `SegmentPostingsOption` available.
|
||||
///
|
||||
/// If the field is not indexed, then returns `None`.
|
||||
pub fn get_segment_postings_option(&self) -> Option<SegmentPostingsOption> {
|
||||
match *self {
|
||||
FieldType::Str(ref text_options) => {
|
||||
match text_options.get_indexing_options() {
|
||||
TextIndexingOptions::Untokenized =>
|
||||
Some(SegmentPostingsOption::NoFreq),
|
||||
TextIndexingOptions::TokenizedNoFreq =>
|
||||
SegmentPostingsOption::NoFreq,
|
||||
Some(SegmentPostingsOption::NoFreq),
|
||||
TextIndexingOptions::TokenizedWithFreq =>
|
||||
SegmentPostingsOption::Freq,
|
||||
Some(SegmentPostingsOption::Freq),
|
||||
TextIndexingOptions::TokenizedWithFreqAndPosition =>
|
||||
SegmentPostingsOption::FreqAndPositions,
|
||||
_ => {
|
||||
SegmentPostingsOption::NoFreq
|
||||
Some(SegmentPostingsOption::FreqAndPositions),
|
||||
TextIndexingOptions::Unindexed => {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldType::U64(_) |
|
||||
FieldType::I64(_) => SegmentPostingsOption::NoFreq,
|
||||
FieldType::U64(ref int_options) |
|
||||
FieldType::I64(ref int_options) => {
|
||||
if int_options.is_indexed() {
|
||||
Some(SegmentPostingsOption::NoFreq)
|
||||
}
|
||||
else {
|
||||
None
|
||||
}
|
||||
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user