Can request for more or less functionality when opening a segment postings.

This commit is contained in:
Paul Masurel
2016-08-13 14:15:28 +09:00
parent 9a8f153d2b
commit c3e3715cbd
7 changed files with 67 additions and 34 deletions

View File

@@ -92,7 +92,7 @@ impl<'a> PostingsMerger<'a> {
let offset = self.doc_offsets[heap_item.segment_ord];
let reader = &self.readers[heap_item.segment_ord];
let segment_postings = reader.read_postings(&heap_item.term).unwrap();
let segment_postings = reader.read_postings_all_info(&heap_item.term).unwrap();
let offset_postings = OffsetPostings::new(segment_postings, offset);
segment_postings_list.push(offset_postings);
}

View File

@@ -15,6 +15,7 @@ use std::fmt;
use rustc_serialize::json;
use core::index::SegmentInfo;
use schema::Field;
use postings::SegmentPostingsOption;
use postings::SegmentPostings;
use fastfield::{U32FastFieldsReader, U32FastFieldReader};
use schema::FieldEntry;
@@ -104,7 +105,9 @@ impl SegmentReader {
let fieldnorms_data = try!(segment.open_read(SegmentComponent::FIELDNORMS));
let fieldnorms_reader = try!(U32FastFieldsReader::open(fieldnorms_data));
let positions_data = try!(segment.open_read(SegmentComponent::POSITIONS));
let positions_data = segment
.open_read(SegmentComponent::POSITIONS)
.unwrap_or(ReadOnlySource::Anonymous(Vec::new()));
let schema = segment.schema();
Ok(SegmentReader {
@@ -132,7 +135,11 @@ impl SegmentReader {
self.store_reader.get(doc_id)
}
pub fn read_postings(&self, term: &Term) -> Option<SegmentPostings> {
// TODO None is quite ambiguous here.
// is it because the term is not here, or because the
// field does not handle this functionality.
pub fn read_postings(&self, term: &Term, option: SegmentPostingsOption) -> Option<SegmentPostings> {
let field = term.get_field();
let field_entry = self.schema.get_field_entry(field);
let term_info = get!(self.get_term_info(&term));
@@ -141,26 +148,51 @@ impl SegmentReader {
let freq_handler = match field_entry {
&FieldEntry::Text(_, ref options) => {
let indexing_options = options.get_indexing_options();
match indexing_options {
TextIndexingOptions::TokenizedWithFreq => {
FreqHandler::new_with_freq()
}
TextIndexingOptions::TokenizedWithFreqAndPosition => {
let offseted_position_data = &self.positions_data[term_info.positions_offset as usize ..];
FreqHandler::new_with_freq_and_position(offseted_position_data)
}
_ => {
match option {
SegmentPostingsOption::NoFreq => {
FreqHandler::new()
}
SegmentPostingsOption::Freq => {
if indexing_options.is_termfreq_enabled() {
FreqHandler::new_with_freq()
}
else {
FreqHandler::new()
}
}
SegmentPostingsOption::FreqAndPositions => {
if indexing_options == TextIndexingOptions::TokenizedWithFreqAndPosition {
let offseted_position_data = &self.positions_data[term_info.positions_offset as usize ..];
FreqHandler::new_with_freq_and_position(offseted_position_data)
}
else {
FreqHandler::new_with_freq()
}
}
}
}
_ => {
panic!("Expected text field, got {:?}", field_entry);
FreqHandler::new()
}
};
Some(SegmentPostings::from_data(term_info.doc_freq, &postings_data, freq_handler))
}
pub fn read_postings_all_info(&self, term: &Term) -> Option<SegmentPostings> {
let field_entry = self.schema.get_field_entry(term.get_field());
let segment_posting_option = match field_entry {
&FieldEntry::Text(_, ref text_options) => {
match text_options.get_indexing_options() {
TextIndexingOptions::TokenizedWithFreq => SegmentPostingsOption::Freq,
TextIndexingOptions::TokenizedWithFreqAndPosition => SegmentPostingsOption::FreqAndPositions,
_ => SegmentPostingsOption::NoFreq,
}
}
&FieldEntry::U32(_, _) => SegmentPostingsOption::NoFreq
};
self.read_postings(term, segment_posting_option)
}
pub fn get_term_info<'a>(&'a self, term: &Term) -> Option<TermInfo> {
self.term_infos.get(term.as_slice())
}

View File

@@ -75,7 +75,7 @@ impl Directory for MmapDirectory {
let new_mmap = try!(
MmapReadOnly::open_path(full_path.clone())
.map_err(|err| {
if err.kind() == io::ErrorKind::AlreadyExists {
if err.kind() == io::ErrorKind::NotFound {
OpenError::FileDoesNotExist(PathBuf::from(&full_path))
}
else {

View File

@@ -3,19 +3,13 @@ use std::io::Cursor;
use common::VInt;
use common::BinarySerializable;
use compression::CompositeDecoder;
use postings::SegmentPostingsOption;
use compression::NUM_DOCS_PER_BLOCK;
enum Option {
NoFreq,
Freq,
FreqAndPositions,
}
pub struct FreqHandler {
freq_decoder: SIMDBlockDecoder,
positions: Vec<u32>,
option: Option,
option: SegmentPostingsOption,
positions_offsets: [usize; NUM_DOCS_PER_BLOCK + 1],
}
@@ -38,7 +32,7 @@ impl FreqHandler {
FreqHandler {
freq_decoder: SIMDBlockDecoder::with_val(1u32),
positions: Vec::new(),
option: Option::NoFreq,
option: SegmentPostingsOption::NoFreq,
positions_offsets: [0; NUM_DOCS_PER_BLOCK + 1],
}
}
@@ -47,7 +41,7 @@ impl FreqHandler {
FreqHandler {
freq_decoder: SIMDBlockDecoder::new(),
positions: Vec::new(),
option: Option::Freq,
option: SegmentPostingsOption::Freq,
positions_offsets: [0; NUM_DOCS_PER_BLOCK + 1],
}
}
@@ -57,7 +51,7 @@ impl FreqHandler {
FreqHandler {
freq_decoder: SIMDBlockDecoder::new(),
positions: positions,
option: Option::FreqAndPositions,
option: SegmentPostingsOption::FreqAndPositions,
positions_offsets: [0; NUM_DOCS_PER_BLOCK + 1],
}
}
@@ -88,13 +82,13 @@ impl FreqHandler {
pub fn read_freq_block<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
match self.option {
Option::NoFreq => {
SegmentPostingsOption::NoFreq => {
data
}
Option::Freq => {
SegmentPostingsOption::Freq => {
self.freq_decoder.uncompress_block_unsorted(data)
}
Option::FreqAndPositions => {
SegmentPostingsOption::FreqAndPositions => {
let remaining: &'a [u8] = self.freq_decoder.uncompress_block_unsorted(data);
self.fill_positions_offset();
remaining
@@ -104,11 +98,11 @@ impl FreqHandler {
pub fn read_freq_vint(&mut self, data: &[u8], num_els: usize) {
match self.option {
Option::NoFreq => {}
Option::Freq => {
SegmentPostingsOption::NoFreq => {}
SegmentPostingsOption::Freq => {
self.freq_decoder.uncompress_vint_unsorted(data, num_els);
}
Option::FreqAndPositions => {
SegmentPostingsOption::FreqAndPositions => {
self.freq_decoder.uncompress_vint_unsorted(data, num_els);
self.fill_positions_offset();
}

View File

@@ -12,6 +12,7 @@ mod freq_handler;
mod union_postings;
mod docset;
mod scored_docset;
mod segment_postings_option;
pub use self::docset::{SkipResult, DocSet};
pub use self::union_postings::UnionPostings;
@@ -30,7 +31,7 @@ pub use self::intersection::IntersectionDocSet;
pub use self::freq_handler::FreqHandler;
pub use self::scored_docset::ScoredDocSet;
pub use self::postings::HasLen;
pub use self::segment_postings_option::SegmentPostingsOption;
#[cfg(test)]
mod tests {

View File

@@ -0,0 +1,5 @@
pub enum SegmentPostingsOption {
NoFreq,
Freq,
FreqAndPositions,
}

View File

@@ -20,6 +20,7 @@ use query::MultiTermAccumulator;
use DocAddress;
use query::Explanation;
use query::occur::Occur;
use postings::SegmentPostingsOption;
#[derive(Eq, PartialEq, Debug)]
@@ -73,7 +74,7 @@ impl MultiTermQuery {
let mut decode_timer = timer.open("decode_all");
for &(occur, ref term) in &self.occur_terms {
let _decode_one_timer = decode_timer.open("decode_one");
match reader.read_postings(&term) {
match reader.read_postings(&term, SegmentPostingsOption::Freq) {
Some(postings) => {
let field = term.get_field();
let fieldnorm_reader = try!(reader.get_fieldnorms_reader(field));