mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-26 05:00:41 +00:00
Can request for more or less functionality when opening a segment postings.
This commit is contained in:
@@ -92,7 +92,7 @@ impl<'a> PostingsMerger<'a> {
|
||||
|
||||
let offset = self.doc_offsets[heap_item.segment_ord];
|
||||
let reader = &self.readers[heap_item.segment_ord];
|
||||
let segment_postings = reader.read_postings(&heap_item.term).unwrap();
|
||||
let segment_postings = reader.read_postings_all_info(&heap_item.term).unwrap();
|
||||
let offset_postings = OffsetPostings::new(segment_postings, offset);
|
||||
segment_postings_list.push(offset_postings);
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ use std::fmt;
|
||||
use rustc_serialize::json;
|
||||
use core::index::SegmentInfo;
|
||||
use schema::Field;
|
||||
use postings::SegmentPostingsOption;
|
||||
use postings::SegmentPostings;
|
||||
use fastfield::{U32FastFieldsReader, U32FastFieldReader};
|
||||
use schema::FieldEntry;
|
||||
@@ -104,7 +105,9 @@ impl SegmentReader {
|
||||
let fieldnorms_data = try!(segment.open_read(SegmentComponent::FIELDNORMS));
|
||||
let fieldnorms_reader = try!(U32FastFieldsReader::open(fieldnorms_data));
|
||||
|
||||
let positions_data = try!(segment.open_read(SegmentComponent::POSITIONS));
|
||||
let positions_data = segment
|
||||
.open_read(SegmentComponent::POSITIONS)
|
||||
.unwrap_or(ReadOnlySource::Anonymous(Vec::new()));
|
||||
|
||||
let schema = segment.schema();
|
||||
Ok(SegmentReader {
|
||||
@@ -132,7 +135,11 @@ impl SegmentReader {
|
||||
self.store_reader.get(doc_id)
|
||||
}
|
||||
|
||||
pub fn read_postings(&self, term: &Term) -> Option<SegmentPostings> {
|
||||
|
||||
// TODO None is quite ambiguous here.
|
||||
// is it because the term is not here, or because the
|
||||
// field does not handle this functionality.
|
||||
pub fn read_postings(&self, term: &Term, option: SegmentPostingsOption) -> Option<SegmentPostings> {
|
||||
let field = term.get_field();
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
let term_info = get!(self.get_term_info(&term));
|
||||
@@ -141,26 +148,51 @@ impl SegmentReader {
|
||||
let freq_handler = match field_entry {
|
||||
&FieldEntry::Text(_, ref options) => {
|
||||
let indexing_options = options.get_indexing_options();
|
||||
match indexing_options {
|
||||
TextIndexingOptions::TokenizedWithFreq => {
|
||||
FreqHandler::new_with_freq()
|
||||
}
|
||||
TextIndexingOptions::TokenizedWithFreqAndPosition => {
|
||||
let offseted_position_data = &self.positions_data[term_info.positions_offset as usize ..];
|
||||
FreqHandler::new_with_freq_and_position(offseted_position_data)
|
||||
}
|
||||
_ => {
|
||||
match option {
|
||||
SegmentPostingsOption::NoFreq => {
|
||||
FreqHandler::new()
|
||||
}
|
||||
SegmentPostingsOption::Freq => {
|
||||
if indexing_options.is_termfreq_enabled() {
|
||||
FreqHandler::new_with_freq()
|
||||
}
|
||||
else {
|
||||
FreqHandler::new()
|
||||
}
|
||||
}
|
||||
SegmentPostingsOption::FreqAndPositions => {
|
||||
if indexing_options == TextIndexingOptions::TokenizedWithFreqAndPosition {
|
||||
let offseted_position_data = &self.positions_data[term_info.positions_offset as usize ..];
|
||||
FreqHandler::new_with_freq_and_position(offseted_position_data)
|
||||
}
|
||||
else {
|
||||
FreqHandler::new_with_freq()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
panic!("Expected text field, got {:?}", field_entry);
|
||||
FreqHandler::new()
|
||||
}
|
||||
};
|
||||
Some(SegmentPostings::from_data(term_info.doc_freq, &postings_data, freq_handler))
|
||||
}
|
||||
|
||||
|
||||
pub fn read_postings_all_info(&self, term: &Term) -> Option<SegmentPostings> {
|
||||
let field_entry = self.schema.get_field_entry(term.get_field());
|
||||
let segment_posting_option = match field_entry {
|
||||
&FieldEntry::Text(_, ref text_options) => {
|
||||
match text_options.get_indexing_options() {
|
||||
TextIndexingOptions::TokenizedWithFreq => SegmentPostingsOption::Freq,
|
||||
TextIndexingOptions::TokenizedWithFreqAndPosition => SegmentPostingsOption::FreqAndPositions,
|
||||
_ => SegmentPostingsOption::NoFreq,
|
||||
}
|
||||
}
|
||||
&FieldEntry::U32(_, _) => SegmentPostingsOption::NoFreq
|
||||
};
|
||||
self.read_postings(term, segment_posting_option)
|
||||
}
|
||||
|
||||
pub fn get_term_info<'a>(&'a self, term: &Term) -> Option<TermInfo> {
|
||||
self.term_infos.get(term.as_slice())
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ impl Directory for MmapDirectory {
|
||||
let new_mmap = try!(
|
||||
MmapReadOnly::open_path(full_path.clone())
|
||||
.map_err(|err| {
|
||||
if err.kind() == io::ErrorKind::AlreadyExists {
|
||||
if err.kind() == io::ErrorKind::NotFound {
|
||||
OpenError::FileDoesNotExist(PathBuf::from(&full_path))
|
||||
}
|
||||
else {
|
||||
|
||||
@@ -3,19 +3,13 @@ use std::io::Cursor;
|
||||
use common::VInt;
|
||||
use common::BinarySerializable;
|
||||
use compression::CompositeDecoder;
|
||||
use postings::SegmentPostingsOption;
|
||||
use compression::NUM_DOCS_PER_BLOCK;
|
||||
|
||||
|
||||
enum Option {
|
||||
NoFreq,
|
||||
Freq,
|
||||
FreqAndPositions,
|
||||
}
|
||||
|
||||
pub struct FreqHandler {
|
||||
freq_decoder: SIMDBlockDecoder,
|
||||
positions: Vec<u32>,
|
||||
option: Option,
|
||||
option: SegmentPostingsOption,
|
||||
positions_offsets: [usize; NUM_DOCS_PER_BLOCK + 1],
|
||||
}
|
||||
|
||||
@@ -38,7 +32,7 @@ impl FreqHandler {
|
||||
FreqHandler {
|
||||
freq_decoder: SIMDBlockDecoder::with_val(1u32),
|
||||
positions: Vec::new(),
|
||||
option: Option::NoFreq,
|
||||
option: SegmentPostingsOption::NoFreq,
|
||||
positions_offsets: [0; NUM_DOCS_PER_BLOCK + 1],
|
||||
}
|
||||
}
|
||||
@@ -47,7 +41,7 @@ impl FreqHandler {
|
||||
FreqHandler {
|
||||
freq_decoder: SIMDBlockDecoder::new(),
|
||||
positions: Vec::new(),
|
||||
option: Option::Freq,
|
||||
option: SegmentPostingsOption::Freq,
|
||||
positions_offsets: [0; NUM_DOCS_PER_BLOCK + 1],
|
||||
}
|
||||
}
|
||||
@@ -57,7 +51,7 @@ impl FreqHandler {
|
||||
FreqHandler {
|
||||
freq_decoder: SIMDBlockDecoder::new(),
|
||||
positions: positions,
|
||||
option: Option::FreqAndPositions,
|
||||
option: SegmentPostingsOption::FreqAndPositions,
|
||||
positions_offsets: [0; NUM_DOCS_PER_BLOCK + 1],
|
||||
}
|
||||
}
|
||||
@@ -88,13 +82,13 @@ impl FreqHandler {
|
||||
|
||||
pub fn read_freq_block<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
|
||||
match self.option {
|
||||
Option::NoFreq => {
|
||||
SegmentPostingsOption::NoFreq => {
|
||||
data
|
||||
}
|
||||
Option::Freq => {
|
||||
SegmentPostingsOption::Freq => {
|
||||
self.freq_decoder.uncompress_block_unsorted(data)
|
||||
}
|
||||
Option::FreqAndPositions => {
|
||||
SegmentPostingsOption::FreqAndPositions => {
|
||||
let remaining: &'a [u8] = self.freq_decoder.uncompress_block_unsorted(data);
|
||||
self.fill_positions_offset();
|
||||
remaining
|
||||
@@ -104,11 +98,11 @@ impl FreqHandler {
|
||||
|
||||
pub fn read_freq_vint(&mut self, data: &[u8], num_els: usize) {
|
||||
match self.option {
|
||||
Option::NoFreq => {}
|
||||
Option::Freq => {
|
||||
SegmentPostingsOption::NoFreq => {}
|
||||
SegmentPostingsOption::Freq => {
|
||||
self.freq_decoder.uncompress_vint_unsorted(data, num_els);
|
||||
}
|
||||
Option::FreqAndPositions => {
|
||||
SegmentPostingsOption::FreqAndPositions => {
|
||||
self.freq_decoder.uncompress_vint_unsorted(data, num_els);
|
||||
self.fill_positions_offset();
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ mod freq_handler;
|
||||
mod union_postings;
|
||||
mod docset;
|
||||
mod scored_docset;
|
||||
mod segment_postings_option;
|
||||
|
||||
pub use self::docset::{SkipResult, DocSet};
|
||||
pub use self::union_postings::UnionPostings;
|
||||
@@ -30,7 +31,7 @@ pub use self::intersection::IntersectionDocSet;
|
||||
pub use self::freq_handler::FreqHandler;
|
||||
pub use self::scored_docset::ScoredDocSet;
|
||||
pub use self::postings::HasLen;
|
||||
|
||||
pub use self::segment_postings_option::SegmentPostingsOption;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
5
src/postings/segment_postings_option.rs
Normal file
5
src/postings/segment_postings_option.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
pub enum SegmentPostingsOption {
|
||||
NoFreq,
|
||||
Freq,
|
||||
FreqAndPositions,
|
||||
}
|
||||
@@ -20,6 +20,7 @@ use query::MultiTermAccumulator;
|
||||
use DocAddress;
|
||||
use query::Explanation;
|
||||
use query::occur::Occur;
|
||||
use postings::SegmentPostingsOption;
|
||||
|
||||
|
||||
#[derive(Eq, PartialEq, Debug)]
|
||||
@@ -73,7 +74,7 @@ impl MultiTermQuery {
|
||||
let mut decode_timer = timer.open("decode_all");
|
||||
for &(occur, ref term) in &self.occur_terms {
|
||||
let _decode_one_timer = decode_timer.open("decode_one");
|
||||
match reader.read_postings(&term) {
|
||||
match reader.read_postings(&term, SegmentPostingsOption::Freq) {
|
||||
Some(postings) => {
|
||||
let field = term.get_field();
|
||||
let fieldnorm_reader = try!(reader.get_fieldnorms_reader(field));
|
||||
|
||||
Reference in New Issue
Block a user