diff --git a/src/core/index_meta.rs b/src/core/index_meta.rs index c58b830ee..8291cf032 100644 --- a/src/core/index_meta.rs +++ b/src/core/index_meta.rs @@ -401,7 +401,7 @@ mod tests { let json = serde_json::ser::to_string(&index_metas).expect("serialization failed"); assert_eq!( json, - r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"lz4"},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false}}],"opstamp":0}"# + r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"lz4"},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"# ); let deser_meta: UntrackedIndexMeta = serde_json::from_str(&json).unwrap(); diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index ab443b781..b4f95ebc2 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -196,6 +196,26 @@ fn value_to_u64(value: &Value) -> u64 { } } +/// The fast field type +pub enum FastFieldType { + /// Numeric type, e.g. f64. + Numeric, + /// Fast field stores string ids. + String, + /// Fast field stores string ids for facets. + Facet, +} + +impl FastFieldType { + fn is_storing_term_ids(&self) -> bool { + matches!(self, FastFieldType::String | FastFieldType::Facet) + } + + fn is_facet(&self) -> bool { + matches!(self, FastFieldType::Facet) + } +} + #[cfg(test)] mod tests { diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index 9efa06a4c..7adbfb0bb 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -4,7 +4,7 @@ use fnv::FnvHashMap; use tantivy_bitpacker::minmax; use crate::fastfield::serializer::BitpackedFastFieldSerializerLegacy; -use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer}; +use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType}; use crate::indexer::doc_id_mapping::DocIdMapping; use crate::postings::UnorderedTermId; use crate::schema::{Document, Field}; @@ -38,20 +38,17 @@ pub struct MultiValuedFastFieldWriter { field: Field, vals: Vec, doc_index: Vec, - is_storing_term_ids: bool, - is_facet: bool, + fast_field_type: FastFieldType, } impl MultiValuedFastFieldWriter { /// Creates a new `MultiValuedFastFieldWriter` - pub(crate) fn new(field: Field, mut is_storing_term_ids: bool, is_facet: bool) -> Self { - is_storing_term_ids |= is_facet; + pub(crate) fn new(field: Field, fast_field_type: FastFieldType) -> Self { MultiValuedFastFieldWriter { field, vals: Vec::new(), doc_index: Vec::new(), - is_storing_term_ids, - is_facet, + fast_field_type, } } @@ -81,7 +78,7 @@ impl MultiValuedFastFieldWriter { pub fn add_document(&mut self, doc: &Document) { self.next_doc(); // facets/texts are indexed in the `SegmentWriter` as we encode their unordered id. - if self.is_storing_term_ids { + if self.fast_field_type.is_storing_term_ids() { return; } for field_value in doc.field_values() { @@ -170,7 +167,7 @@ impl MultiValuedFastFieldWriter { 1, )?; - if self.is_facet { + if self.fast_field_type.is_facet() { let mut doc_vals: Vec = Vec::with_capacity(100); for vals in self.get_ordered_values(doc_id_map) { doc_vals.clear(); diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 7c7069183..a28bf732c 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -7,7 +7,7 @@ use tantivy_bitpacker::BlockedBitpacker; use super::multivalued::MultiValuedFastFieldWriter; use super::serializer::FastFieldStats; -use super::FastFieldDataAccess; +use super::{FastFieldDataAccess, FastFieldType}; use crate::fastfield::{BytesFastFieldWriter, CompositeFastFieldSerializer}; use crate::indexer::doc_id_mapping::DocIdMapping; use crate::postings::UnorderedTermId; @@ -53,18 +53,20 @@ impl FastFieldsWriter { } Some(Cardinality::MultiValues) => { let fast_field_writer = - MultiValuedFastFieldWriter::new(field, false, false); + MultiValuedFastFieldWriter::new(field, FastFieldType::Numeric); multi_values_writers.push(fast_field_writer); } None => {} } } FieldType::Facet(_) => { - let fast_field_writer = MultiValuedFastFieldWriter::new(field, true, true); + let fast_field_writer = + MultiValuedFastFieldWriter::new(field, FastFieldType::Facet); term_id_writers.push(fast_field_writer); } FieldType::Str(_) if field_entry.is_fast() => { - let fast_field_writer = MultiValuedFastFieldWriter::new(field, true, false); + let fast_field_writer = + MultiValuedFastFieldWriter::new(field, FastFieldType::String); term_id_writers.push(fast_field_writer); } FieldType::Bytes(bytes_option) => { diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 1f6a03448..b49016219 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -138,7 +138,8 @@ mod tests { "fieldnorms": true, "tokenizer": "default" }, - "stored": false + "stored": false, + "fast": false } }"#; let field_value_json = serde_json::to_string_pretty(&field_value).unwrap(); diff --git a/src/schema/schema.rs b/src/schema/schema.rs index ff38a6b67..235d0412d 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -417,6 +417,7 @@ mod tests { use std::collections::BTreeMap; use matches::{assert_matches, matches}; + use pretty_assertions::assert_eq; use serde_json; use crate::schema::field_type::ValueParsingError; @@ -469,7 +470,8 @@ mod tests { "fieldnorms": true, "tokenizer": "default" }, - "stored": false + "stored": false, + "fast": false } }, { @@ -481,7 +483,8 @@ mod tests { "fieldnorms": false, "tokenizer": "raw" }, - "stored": false + "stored": false, + "fast": false } }, { @@ -784,7 +787,8 @@ mod tests { "fieldnorms": true, "tokenizer": "default" }, - "stored": false + "stored": false, + "fast": false } }, { @@ -816,7 +820,8 @@ mod tests { "fieldnorms": true, "tokenizer": "raw" }, - "stored": true + "stored": true, + "fast": false } }, { @@ -838,7 +843,8 @@ mod tests { "fieldnorms": true, "tokenizer": "default" }, - "stored": false + "stored": false, + "fast": false } }, { diff --git a/src/schema/text_options.rs b/src/schema/text_options.rs index 1be267b2f..b164ada31 100644 --- a/src/schema/text_options.rs +++ b/src/schema/text_options.rs @@ -16,14 +16,9 @@ pub struct TextOptions { #[serde(default)] stored: bool, #[serde(default)] - #[serde(skip_serializing_if = "is_false")] fast: bool, } -fn is_false(val: &bool) -> bool { - !(*val) -} - impl TextOptions { /// Returns the indexing options. pub fn get_indexing_options(&self) -> Option<&TextFieldIndexing> {