mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 01:02:55 +00:00
Compare commits
1 Commits
raphael_op
...
fieldnorm_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
063ed30f66 |
@@ -8,7 +8,7 @@ use crate::directory::ReadOnlySource;
|
|||||||
use crate::fastfield::DeleteBitSet;
|
use crate::fastfield::DeleteBitSet;
|
||||||
use crate::fastfield::FacetReader;
|
use crate::fastfield::FacetReader;
|
||||||
use crate::fastfield::FastFieldReaders;
|
use crate::fastfield::FastFieldReaders;
|
||||||
use crate::fieldnorm::FieldNormReader;
|
use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
|
||||||
use crate::schema::Field;
|
use crate::schema::Field;
|
||||||
use crate::schema::FieldType;
|
use crate::schema::FieldType;
|
||||||
use crate::schema::Schema;
|
use crate::schema::Schema;
|
||||||
@@ -48,7 +48,7 @@ pub struct SegmentReader {
|
|||||||
positions_composite: CompositeFile,
|
positions_composite: CompositeFile,
|
||||||
positions_idx_composite: CompositeFile,
|
positions_idx_composite: CompositeFile,
|
||||||
fast_fields_readers: Arc<FastFieldReaders>,
|
fast_fields_readers: Arc<FastFieldReaders>,
|
||||||
fieldnorms_composite: CompositeFile,
|
fieldnorm_readers: FieldNormReaders,
|
||||||
|
|
||||||
store_source: ReadOnlySource,
|
store_source: ReadOnlySource,
|
||||||
delete_bitset_opt: Option<DeleteBitSet>,
|
delete_bitset_opt: Option<DeleteBitSet>,
|
||||||
@@ -126,8 +126,8 @@ impl SegmentReader {
|
|||||||
/// They are simply stored as a fast field, serialized in
|
/// They are simply stored as a fast field, serialized in
|
||||||
/// the `.fieldnorm` file of the segment.
|
/// the `.fieldnorm` file of the segment.
|
||||||
pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader {
|
pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader {
|
||||||
if let Some(fieldnorm_source) = self.fieldnorms_composite.open_read(field) {
|
if let Some(fieldnorm_reader) = self.fieldnorm_readers.get_field(field) {
|
||||||
FieldNormReader::open(fieldnorm_source)
|
fieldnorm_reader
|
||||||
} else {
|
} else {
|
||||||
let field_name = self.schema.get_field_name(field);
|
let field_name = self.schema.get_field_name(field);
|
||||||
let err_msg = format!(
|
let err_msg = format!(
|
||||||
@@ -178,8 +178,8 @@ impl SegmentReader {
|
|||||||
let fast_field_readers =
|
let fast_field_readers =
|
||||||
Arc::new(FastFieldReaders::load_all(&schema, &fast_fields_composite)?);
|
Arc::new(FastFieldReaders::load_all(&schema, &fast_fields_composite)?);
|
||||||
|
|
||||||
let fieldnorms_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
|
let fieldnorm_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
|
||||||
let fieldnorms_composite = CompositeFile::open(&fieldnorms_data)?;
|
let fieldnorm_readers = FieldNormReaders::new(fieldnorm_data)?;
|
||||||
|
|
||||||
let delete_bitset_opt = if segment.meta().has_deletes() {
|
let delete_bitset_opt = if segment.meta().has_deletes() {
|
||||||
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
|
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
|
||||||
@@ -195,7 +195,7 @@ impl SegmentReader {
|
|||||||
termdict_composite,
|
termdict_composite,
|
||||||
postings_composite,
|
postings_composite,
|
||||||
fast_fields_readers: fast_field_readers,
|
fast_fields_readers: fast_field_readers,
|
||||||
fieldnorms_composite,
|
fieldnorm_readers,
|
||||||
segment_id: segment.id(),
|
segment_id: segment.id(),
|
||||||
store_source,
|
store_source,
|
||||||
delete_bitset_opt,
|
delete_bitset_opt,
|
||||||
@@ -308,7 +308,7 @@ impl SegmentReader {
|
|||||||
self.positions_composite.space_usage(),
|
self.positions_composite.space_usage(),
|
||||||
self.positions_idx_composite.space_usage(),
|
self.positions_idx_composite.space_usage(),
|
||||||
self.fast_fields_readers.space_usage(),
|
self.fast_fields_readers.space_usage(),
|
||||||
self.fieldnorms_composite.space_usage(),
|
self.fieldnorm_readers.space_usage(),
|
||||||
self.get_store_reader().space_usage(),
|
self.get_store_reader().space_usage(),
|
||||||
self.delete_bitset_opt
|
self.delete_bitset_opt
|
||||||
.as_ref()
|
.as_ref()
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ mod reader;
|
|||||||
mod serializer;
|
mod serializer;
|
||||||
mod writer;
|
mod writer;
|
||||||
|
|
||||||
pub use self::reader::FieldNormReader;
|
pub use self::reader::{FieldNormReader, FieldNormReaders};
|
||||||
pub use self::serializer::FieldNormsSerializer;
|
pub use self::serializer::FieldNormsSerializer;
|
||||||
pub use self::writer::FieldNormsWriter;
|
pub use self::writer::FieldNormsWriter;
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,41 @@
|
|||||||
use super::{fieldnorm_to_id, id_to_fieldnorm};
|
use super::{fieldnorm_to_id, id_to_fieldnorm};
|
||||||
|
use crate::common::CompositeFile;
|
||||||
use crate::directory::ReadOnlySource;
|
use crate::directory::ReadOnlySource;
|
||||||
|
use crate::schema::Field;
|
||||||
|
use crate::space_usage::PerFieldSpaceUsage;
|
||||||
use crate::DocId;
|
use crate::DocId;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
/// Reader for the fieldnorm (for each document, the number of tokens indexed in the
|
||||||
|
/// field) of all indexed fields in the index.
|
||||||
|
///
|
||||||
|
/// Each fieldnorm is approximately compressed over one byte. We refer to this byte as
|
||||||
|
/// `fieldnorm_id`.
|
||||||
|
/// The mapping from `fieldnorm` to `fieldnorm_id` is given by monotonic.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct FieldNormReaders {
|
||||||
|
data: Arc<CompositeFile>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FieldNormReaders {
|
||||||
|
/// Creates a field norm reader.
|
||||||
|
pub fn new(source: ReadOnlySource) -> crate::Result<FieldNormReaders> {
|
||||||
|
let data = CompositeFile::open(&source)?;
|
||||||
|
Ok(FieldNormReaders {
|
||||||
|
data: Arc::new(data),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the FieldNormReader for a specific field.
|
||||||
|
pub fn get_field(&self, field: Field) -> Option<FieldNormReader> {
|
||||||
|
self.data.open_read(field).map(FieldNormReader::open)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return a break down of the space usage per field.
|
||||||
|
pub fn space_usage(&self) -> PerFieldSpaceUsage {
|
||||||
|
self.data.space_usage()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Reads the fieldnorm associated to a document.
|
/// Reads the fieldnorm associated to a document.
|
||||||
/// The fieldnorm represents the length associated to
|
/// The fieldnorm represents the length associated to
|
||||||
@@ -19,6 +54,7 @@ use crate::DocId;
|
|||||||
/// Apart from compression, this scale also makes it possible to
|
/// Apart from compression, this scale also makes it possible to
|
||||||
/// precompute computationally expensive functions of the fieldnorm
|
/// precompute computationally expensive functions of the fieldnorm
|
||||||
/// in a very short array.
|
/// in a very short array.
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct FieldNormReader {
|
pub struct FieldNormReader {
|
||||||
data: ReadOnlySource,
|
data: ReadOnlySource,
|
||||||
}
|
}
|
||||||
@@ -29,6 +65,11 @@ impl FieldNormReader {
|
|||||||
FieldNormReader { data }
|
FieldNormReader { data }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the number of documents in this segment.
|
||||||
|
pub fn num_docs(&self) -> u32 {
|
||||||
|
self.data.len() as u32
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the `fieldnorm` associated to a doc id.
|
/// Returns the `fieldnorm` associated to a doc id.
|
||||||
/// The fieldnorm is a value approximating the number
|
/// The fieldnorm is a value approximating the number
|
||||||
/// of tokens in a given field of the `doc_id`.
|
/// of tokens in a given field of the `doc_id`.
|
||||||
@@ -65,10 +106,11 @@ impl FieldNormReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
impl From<Vec<u32>> for FieldNormReader {
|
impl From<&[u32]> for FieldNormReader {
|
||||||
fn from(field_norms: Vec<u32>) -> FieldNormReader {
|
fn from(field_norms: &[u32]) -> FieldNormReader {
|
||||||
let field_norms_id = field_norms
|
let field_norms_id = field_norms
|
||||||
.into_iter()
|
.iter()
|
||||||
|
.cloned()
|
||||||
.map(FieldNormReader::fieldnorm_to_id)
|
.map(FieldNormReader::fieldnorm_to_id)
|
||||||
.collect::<Vec<u8>>();
|
.collect::<Vec<u8>>();
|
||||||
let field_norms_data = ReadOnlySource::from(field_norms_id);
|
let field_norms_data = ReadOnlySource::from(field_norms_id);
|
||||||
|
|||||||
@@ -78,11 +78,12 @@ impl FieldNormsWriter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Serialize the seen fieldnorm values to the serializer for all fields.
|
/// Serialize the seen fieldnorm values to the serializer for all fields.
|
||||||
pub fn serialize(&self, fieldnorms_serializer: &mut FieldNormsSerializer) -> io::Result<()> {
|
pub fn serialize(&self, mut fieldnorms_serializer: FieldNormsSerializer) -> io::Result<()> {
|
||||||
for &field in self.fields.iter() {
|
for &field in self.fields.iter() {
|
||||||
let fieldnorm_values: &[u8] = &self.fieldnorms_buffer[field.field_id() as usize][..];
|
let fieldnorm_values: &[u8] = &self.fieldnorms_buffer[field.field_id() as usize][..];
|
||||||
fieldnorms_serializer.serialize_field(field, fieldnorm_values)?;
|
fieldnorms_serializer.serialize_field(field, fieldnorm_values)?;
|
||||||
}
|
}
|
||||||
|
fieldnorms_serializer.close()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -167,7 +167,7 @@ impl IndexMerger {
|
|||||||
|
|
||||||
fn write_fieldnorms(
|
fn write_fieldnorms(
|
||||||
&self,
|
&self,
|
||||||
fieldnorms_serializer: &mut FieldNormsSerializer,
|
mut fieldnorms_serializer: FieldNormsSerializer,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
let fields = FieldNormsWriter::fields_with_fieldnorm(&self.schema);
|
let fields = FieldNormsWriter::fields_with_fieldnorm(&self.schema);
|
||||||
let mut fieldnorms_data = Vec::with_capacity(self.max_doc as usize);
|
let mut fieldnorms_data = Vec::with_capacity(self.max_doc as usize);
|
||||||
@@ -182,6 +182,7 @@ impl IndexMerger {
|
|||||||
}
|
}
|
||||||
fieldnorms_serializer.serialize_field(field, &fieldnorms_data[..])?;
|
fieldnorms_serializer.serialize_field(field, &fieldnorms_data[..])?;
|
||||||
}
|
}
|
||||||
|
fieldnorms_serializer.close()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -668,8 +669,10 @@ impl IndexMerger {
|
|||||||
|
|
||||||
impl SerializableSegment for IndexMerger {
|
impl SerializableSegment for IndexMerger {
|
||||||
fn write(&self, mut serializer: SegmentSerializer) -> crate::Result<u32> {
|
fn write(&self, mut serializer: SegmentSerializer) -> crate::Result<u32> {
|
||||||
|
if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
|
||||||
|
self.write_fieldnorms(fieldnorms_serializer)?;
|
||||||
|
}
|
||||||
let term_ord_mappings = self.write_postings(serializer.get_postings_serializer())?;
|
let term_ord_mappings = self.write_postings(serializer.get_postings_serializer())?;
|
||||||
self.write_fieldnorms(serializer.get_fieldnorms_serializer())?;
|
|
||||||
self.write_fast_fields(serializer.get_fast_field_serializer(), term_ord_mappings)?;
|
self.write_fast_fields(serializer.get_fast_field_serializer(), term_ord_mappings)?;
|
||||||
self.write_storable_fields(serializer.get_store_writer())?;
|
self.write_storable_fields(serializer.get_store_writer())?;
|
||||||
serializer.close()?;
|
serializer.close()?;
|
||||||
@@ -1504,12 +1507,9 @@ mod tests {
|
|||||||
for i in 0..100 {
|
for i in 0..100 {
|
||||||
let mut doc = Document::new();
|
let mut doc = Document::new();
|
||||||
doc.add_f64(field, 42.0);
|
doc.add_f64(field, 42.0);
|
||||||
|
|
||||||
doc.add_f64(multi_field, 0.24);
|
doc.add_f64(multi_field, 0.24);
|
||||||
doc.add_f64(multi_field, 0.27);
|
doc.add_f64(multi_field, 0.27);
|
||||||
|
|
||||||
writer.add_document(doc);
|
writer.add_document(doc);
|
||||||
|
|
||||||
if i % 5 == 0 {
|
if i % 5 == 0 {
|
||||||
writer.commit()?;
|
writer.commit()?;
|
||||||
}
|
}
|
||||||
@@ -1521,7 +1521,6 @@ mod tests {
|
|||||||
// If a merging thread fails, we should end up with more
|
// If a merging thread fails, we should end up with more
|
||||||
// than one segment here
|
// than one segment here
|
||||||
assert_eq!(1, index.searchable_segments()?.len());
|
assert_eq!(1, index.searchable_segments()?.len());
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,15 +8,16 @@ use crate::store::StoreWriter;
|
|||||||
/// Segment serializer is in charge of laying out on disk
|
/// Segment serializer is in charge of laying out on disk
|
||||||
/// the data accumulated and sorted by the `SegmentWriter`.
|
/// the data accumulated and sorted by the `SegmentWriter`.
|
||||||
pub struct SegmentSerializer {
|
pub struct SegmentSerializer {
|
||||||
|
segment: Segment,
|
||||||
store_writer: StoreWriter,
|
store_writer: StoreWriter,
|
||||||
fast_field_serializer: FastFieldSerializer,
|
fast_field_serializer: FastFieldSerializer,
|
||||||
fieldnorms_serializer: FieldNormsSerializer,
|
fieldnorms_serializer: Option<FieldNormsSerializer>,
|
||||||
postings_serializer: InvertedIndexSerializer,
|
postings_serializer: InvertedIndexSerializer,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SegmentSerializer {
|
impl SegmentSerializer {
|
||||||
/// Creates a new `SegmentSerializer`.
|
/// Creates a new `SegmentSerializer`.
|
||||||
pub fn for_segment(segment: &mut Segment) -> crate::Result<SegmentSerializer> {
|
pub fn for_segment(mut segment: Segment) -> crate::Result<SegmentSerializer> {
|
||||||
let store_write = segment.open_write(SegmentComponent::STORE)?;
|
let store_write = segment.open_write(SegmentComponent::STORE)?;
|
||||||
|
|
||||||
let fast_field_write = segment.open_write(SegmentComponent::FASTFIELDS)?;
|
let fast_field_write = segment.open_write(SegmentComponent::FASTFIELDS)?;
|
||||||
@@ -25,15 +26,21 @@ impl SegmentSerializer {
|
|||||||
let fieldnorms_write = segment.open_write(SegmentComponent::FIELDNORMS)?;
|
let fieldnorms_write = segment.open_write(SegmentComponent::FIELDNORMS)?;
|
||||||
let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?;
|
let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?;
|
||||||
|
|
||||||
let postings_serializer = InvertedIndexSerializer::open(segment)?;
|
let postings_serializer = InvertedIndexSerializer::open(&mut segment)?;
|
||||||
Ok(SegmentSerializer {
|
Ok(SegmentSerializer {
|
||||||
|
segment,
|
||||||
store_writer: StoreWriter::new(store_write),
|
store_writer: StoreWriter::new(store_write),
|
||||||
fast_field_serializer,
|
fast_field_serializer,
|
||||||
fieldnorms_serializer,
|
fieldnorms_serializer: Some(fieldnorms_serializer),
|
||||||
postings_serializer,
|
postings_serializer,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn segment(&self) -> &Segment {
|
||||||
|
&self.segment
|
||||||
|
}
|
||||||
|
|
||||||
/// Accessor to the `PostingsSerializer`.
|
/// Accessor to the `PostingsSerializer`.
|
||||||
pub fn get_postings_serializer(&mut self) -> &mut InvertedIndexSerializer {
|
pub fn get_postings_serializer(&mut self) -> &mut InvertedIndexSerializer {
|
||||||
&mut self.postings_serializer
|
&mut self.postings_serializer
|
||||||
@@ -44,9 +51,11 @@ impl SegmentSerializer {
|
|||||||
&mut self.fast_field_serializer
|
&mut self.fast_field_serializer
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Accessor to the field norm serializer.
|
/// Extract the field norm serializer.
|
||||||
pub fn get_fieldnorms_serializer(&mut self) -> &mut FieldNormsSerializer {
|
///
|
||||||
&mut self.fieldnorms_serializer
|
/// Note the fieldnorms serializer can only be extracted once.
|
||||||
|
pub fn extract_fieldnorms_serializer(&mut self) -> Option<FieldNormsSerializer> {
|
||||||
|
self.fieldnorms_serializer.take()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Accessor to the `StoreWriter`.
|
/// Accessor to the `StoreWriter`.
|
||||||
@@ -55,11 +64,13 @@ impl SegmentSerializer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Finalize the segment serialization.
|
/// Finalize the segment serialization.
|
||||||
pub fn close(self) -> crate::Result<()> {
|
pub fn close(mut self) -> crate::Result<()> {
|
||||||
|
if let Some(fieldnorms_serializer) = self.extract_fieldnorms_serializer() {
|
||||||
|
fieldnorms_serializer.close()?;
|
||||||
|
}
|
||||||
self.fast_field_serializer.close()?;
|
self.fast_field_serializer.close()?;
|
||||||
self.postings_serializer.close()?;
|
self.postings_serializer.close()?;
|
||||||
self.store_writer.close()?;
|
self.store_writer.close()?;
|
||||||
self.fieldnorms_serializer.close()?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -112,7 +112,7 @@ fn merge(
|
|||||||
target_opstamp: Opstamp,
|
target_opstamp: Opstamp,
|
||||||
) -> crate::Result<SegmentEntry> {
|
) -> crate::Result<SegmentEntry> {
|
||||||
// first we need to apply deletes to our segment.
|
// first we need to apply deletes to our segment.
|
||||||
let mut merged_segment = index.new_segment();
|
let merged_segment = index.new_segment();
|
||||||
|
|
||||||
// First we apply all of the delet to the merged segment, up to the target opstamp.
|
// First we apply all of the delet to the merged segment, up to the target opstamp.
|
||||||
for segment_entry in &mut segment_entries {
|
for segment_entry in &mut segment_entries {
|
||||||
@@ -131,12 +131,13 @@ fn merge(
|
|||||||
let merger: IndexMerger = IndexMerger::open(index.schema(), &segments[..])?;
|
let merger: IndexMerger = IndexMerger::open(index.schema(), &segments[..])?;
|
||||||
|
|
||||||
// ... we just serialize this index merger in our new segment to merge the two segments.
|
// ... we just serialize this index merger in our new segment to merge the two segments.
|
||||||
let segment_serializer = SegmentSerializer::for_segment(&mut merged_segment)?;
|
let segment_serializer = SegmentSerializer::for_segment(merged_segment.clone())?;
|
||||||
|
|
||||||
let num_docs = merger.write(segment_serializer)?;
|
let num_docs = merger.write(segment_serializer)?;
|
||||||
|
|
||||||
let segment_meta = index.new_segment_meta(merged_segment.id(), num_docs);
|
let merged_segment_id = merged_segment.id();
|
||||||
|
|
||||||
|
let segment_meta = index.new_segment_meta(merged_segment_id, num_docs);
|
||||||
Ok(SegmentEntry::new(segment_meta, delete_cursor, None))
|
Ok(SegmentEntry::new(segment_meta, delete_cursor, None))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -62,11 +62,12 @@ impl SegmentWriter {
|
|||||||
/// - schema
|
/// - schema
|
||||||
pub fn for_segment(
|
pub fn for_segment(
|
||||||
memory_budget: usize,
|
memory_budget: usize,
|
||||||
mut segment: Segment,
|
segment: Segment,
|
||||||
schema: &Schema,
|
schema: &Schema,
|
||||||
) -> crate::Result<SegmentWriter> {
|
) -> crate::Result<SegmentWriter> {
|
||||||
|
let tokenizer_manager = segment.index().tokenizers().clone();
|
||||||
let table_num_bits = initial_table_size(memory_budget)?;
|
let table_num_bits = initial_table_size(memory_budget)?;
|
||||||
let segment_serializer = SegmentSerializer::for_segment(&mut segment)?;
|
let segment_serializer = SegmentSerializer::for_segment(segment)?;
|
||||||
let multifield_postings = MultiFieldPostingsWriter::new(schema, table_num_bits);
|
let multifield_postings = MultiFieldPostingsWriter::new(schema, table_num_bits);
|
||||||
let tokenizers = schema
|
let tokenizers = schema
|
||||||
.fields()
|
.fields()
|
||||||
@@ -76,7 +77,7 @@ impl SegmentWriter {
|
|||||||
.get_indexing_options()
|
.get_indexing_options()
|
||||||
.and_then(|text_index_option| {
|
.and_then(|text_index_option| {
|
||||||
let tokenizer_name = &text_index_option.tokenizer();
|
let tokenizer_name = &text_index_option.tokenizer();
|
||||||
segment.index().tokenizers().get(tokenizer_name)
|
tokenizer_manager.get(tokenizer_name)
|
||||||
}),
|
}),
|
||||||
_ => None,
|
_ => None,
|
||||||
},
|
},
|
||||||
@@ -280,9 +281,11 @@ fn write(
|
|||||||
fieldnorms_writer: &FieldNormsWriter,
|
fieldnorms_writer: &FieldNormsWriter,
|
||||||
mut serializer: SegmentSerializer,
|
mut serializer: SegmentSerializer,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
|
if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
|
||||||
|
fieldnorms_writer.serialize(fieldnorms_serializer)?;
|
||||||
|
}
|
||||||
let term_ord_map = multifield_postings.serialize(serializer.get_postings_serializer())?;
|
let term_ord_map = multifield_postings.serialize(serializer.get_postings_serializer())?;
|
||||||
fast_field_writers.serialize(serializer.get_fast_field_serializer(), &term_ord_map)?;
|
fast_field_writers.serialize(serializer.get_fast_field_serializer(), &term_ord_map)?;
|
||||||
fieldnorms_writer.serialize(serializer.get_fieldnorms_serializer())?;
|
|
||||||
serializer.close()?;
|
serializer.close()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ use std::fmt;
|
|||||||
/// - a field name
|
/// - a field name
|
||||||
/// - a field type, itself wrapping up options describing
|
/// - a field type, itself wrapping up options describing
|
||||||
/// how the field should be indexed.
|
/// how the field should be indexed.
|
||||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub struct FieldEntry {
|
pub struct FieldEntry {
|
||||||
name: String,
|
name: String,
|
||||||
field_type: FieldType,
|
field_type: FieldType,
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ pub enum Type {
|
|||||||
|
|
||||||
/// A `FieldType` describes the type (text, u64) of a field as well as
|
/// A `FieldType` describes the type (text, u64) of a field as well as
|
||||||
/// how it should be handled by tantivy.
|
/// how it should be handled by tantivy.
|
||||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub enum FieldType {
|
pub enum FieldType {
|
||||||
/// String field type configuration
|
/// String field type configuration
|
||||||
Str(TextOptions),
|
Str(TextOptions),
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use std::borrow::Cow;
|
|||||||
use std::ops::BitOr;
|
use std::ops::BitOr;
|
||||||
|
|
||||||
/// Define how a text field should be handled by tantivy.
|
/// Define how a text field should be handled by tantivy.
|
||||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct TextOptions {
|
pub struct TextOptions {
|
||||||
indexing: Option<TextFieldIndexing>,
|
indexing: Option<TextFieldIndexing>,
|
||||||
stored: bool,
|
stored: bool,
|
||||||
@@ -51,7 +51,7 @@ impl Default for TextOptions {
|
|||||||
/// - the amount of information that should be stored about the presence of a term in a document.
|
/// - the amount of information that should be stored about the presence of a term in a document.
|
||||||
/// Essentially, should we store the term frequency and/or the positions (See [`IndexRecordOption`](./enum.IndexRecordOption.html)).
|
/// Essentially, should we store the term frequency and/or the positions (See [`IndexRecordOption`](./enum.IndexRecordOption.html)).
|
||||||
/// - the name of the `Tokenizer` that should be used to process the field.
|
/// - the name of the `Tokenizer` that should be used to process the field.
|
||||||
#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
|
#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)]
|
||||||
pub struct TextFieldIndexing {
|
pub struct TextFieldIndexing {
|
||||||
record: IndexRecordOption,
|
record: IndexRecordOption,
|
||||||
tokenizer: Cow<'static, str>,
|
tokenizer: Cow<'static, str>,
|
||||||
|
|||||||
Reference in New Issue
Block a user