diff --git a/src/directory/file_slice.rs b/src/directory/file_slice.rs index eb3bb8cab..7b11377e0 100644 --- a/src/directory/file_slice.rs +++ b/src/directory/file_slice.rs @@ -86,7 +86,7 @@ impl FileSlice { /// Creates an empty FileSlice pub fn empty() -> FileSlice { - const EMPTY_SLICE: &'static [u8] = &[]; + const EMPTY_SLICE: &[u8] = &[]; FileSlice::from(EMPTY_SLICE) } diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 36e46a0c5..49dde8237 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -160,7 +160,8 @@ impl MultiFieldPostingsWriter { FieldType::Bytes(_) => {} } - let postings_writer = &self.per_field_postings_writers[field.field_id() as usize]; + let postings_writer = + self.per_field_postings_writers[field.field_id() as usize].as_ref(); let fieldnorm_reader = fieldnorm_readers.get_field(field)?; let mut field_serializer = serializer.new_field( field, diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index 003d6ffa2..a2f2b911d 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -72,7 +72,7 @@ impl SegmentPostings { let mut buffer = Vec::new(); { let mut postings_serializer = - PostingsSerializer::new(&mut buffer, 0.0, false, false, None); + PostingsSerializer::new(&mut buffer, 0.0, IndexRecordOption::Basic, None); postings_serializer.new_term(docs.len() as u32); for &doc in docs { postings_serializer.write_doc(doc, 1u32); @@ -116,8 +116,7 @@ impl SegmentPostings { let mut postings_serializer = PostingsSerializer::new( &mut buffer, average_field_norm, - true, - false, + IndexRecordOption::WithFreqs, fieldnorm_reader, ); postings_serializer.new_term(doc_and_tfs.len() as u32); diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index e694528ec..f6745e64e 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -8,8 +8,8 @@ use crate::positions::PositionSerializer; use crate::postings::compression::{BlockEncoder, VIntEncoder, COMPRESSION_BLOCK_SIZE}; use crate::postings::skip::SkipSerializer; use crate::query::BM25Weight; -use crate::schema::Schema; use crate::schema::{Field, FieldEntry, FieldType}; +use crate::schema::{IndexRecordOption, Schema}; use crate::termdict::{TermDictionaryBuilder, TermOrdinal}; use crate::{DocId, Score}; use std::cmp::Ordering; @@ -143,30 +143,24 @@ impl<'a> FieldSerializer<'a> { fieldnorm_reader: Option, ) -> io::Result> { total_num_tokens.serialize(postings_write)?; - let (term_freq_enabled, position_enabled): (bool, bool) = match field_type { + let mode = match field_type { FieldType::Str(ref text_options) => { if let Some(text_indexing_options) = text_options.get_indexing_options() { - let index_option = text_indexing_options.index_option(); - (index_option.has_freq(), index_option.has_positions()) + text_indexing_options.index_option() } else { - (false, false) + IndexRecordOption::Basic } } - _ => (false, false), + _ => IndexRecordOption::Basic, }; let term_dictionary_builder = TermDictionaryBuilder::create(term_dictionary_write)?; let average_fieldnorm = fieldnorm_reader .as_ref() .map(|ff_reader| (total_num_tokens as Score / ff_reader.num_docs() as Score)) .unwrap_or(0.0); - let postings_serializer = PostingsSerializer::new( - postings_write, - average_fieldnorm, - term_freq_enabled, - position_enabled, - fieldnorm_reader, - ); - let positions_serializer_opt = if position_enabled { + let postings_serializer = + PostingsSerializer::new(postings_write, average_fieldnorm, mode, fieldnorm_reader); + let positions_serializer_opt = if mode.has_positions() { Some(PositionSerializer::new(positions_write, positionsidx_write)) } else { None @@ -323,8 +317,7 @@ pub struct PostingsSerializer { postings_write: Vec, skip_write: SkipSerializer, - termfreq_enabled: bool, - termfreq_sum_enabled: bool, + mode: IndexRecordOption, fieldnorm_reader: Option, bm25_weight: Option, @@ -338,8 +331,7 @@ impl PostingsSerializer { pub fn new( write: W, avg_fieldnorm: Score, - termfreq_enabled: bool, - termfreq_sum_enabled: bool, + mode: IndexRecordOption, fieldnorm_reader: Option, ) -> PostingsSerializer { let num_docs = fieldnorm_reader @@ -356,8 +348,7 @@ impl PostingsSerializer { skip_write: SkipSerializer::new(), last_doc_id_encoded: 0u32, - termfreq_enabled, - termfreq_sum_enabled, + mode, fieldnorm_reader, bm25_weight: None, @@ -368,7 +359,7 @@ impl PostingsSerializer { } pub fn new_term(&mut self, term_doc_freq: u32) { - if self.termfreq_enabled && self.num_docs > 0 { + if self.mode.has_freq() && self.num_docs > 0 { let bm25_weight = BM25Weight::for_one_term( term_doc_freq as u64, self.num_docs as u64, @@ -390,13 +381,15 @@ impl PostingsSerializer { // last el block 0, offset block 1, self.postings_write.extend(block_encoded); } - if self.termfreq_enabled { + if self.mode.has_freq() { let (num_bits, block_encoded): (u8, &[u8]) = self .block_encoder .compress_block_unsorted(&self.block.term_freqs()); self.postings_write.extend(block_encoded); self.skip_write.write_term_freq(num_bits); - if self.termfreq_sum_enabled { + if self.mode.has_positions() { + // We serialize the sum of term freqs within the skip information + // in order to navigate through positions. let sum_freq = self.block.term_freqs().iter().cloned().sum(); self.skip_write.write_total_term_freq(sum_freq); } @@ -455,7 +448,7 @@ impl PostingsSerializer { self.postings_write.write_all(block_encoded)?; } // ... Idem for term frequencies - if self.termfreq_enabled { + if self.mode.has_freq() { let block_encoded = self .block_encoder .compress_vint_unsorted(self.block.term_freqs());